intel-iommu: Check for an RMRR which ends before it starts.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946 36#include <linux/iova.h>
5d450806 37#include <linux/iommu.h>
38717946 38#include <linux/intel-iommu.h>
f59c7b69 39#include <linux/sysdev.h>
69575d38 40#include <linux/tboot.h>
adb2fe02 41#include <linux/dmi.h>
ba395927 42#include <asm/cacheflush.h>
46a7fa27 43#include <asm/iommu.h>
ba395927
KA
44#include "pci.h"
45
5b6985ce
FY
46#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE
48
ba395927
KA
49#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 51#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
52
53#define IOAPIC_RANGE_START (0xfee00000)
54#define IOAPIC_RANGE_END (0xfeefffff)
55#define IOVA_START_ADDR (0x1000)
56
57#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
58
4ed0d3e6
FY
59#define MAX_AGAW_WIDTH 64
60
2ebe3151
DW
61#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
62#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
63
64/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
65 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
66#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
67 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
68#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 69
f27be03b 70#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 71#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 72#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 73
fd18de50 74
dd4e8319
DW
75/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
76 are never going to work. */
77static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
78{
79 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
80}
81
82static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
83{
84 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
85}
86static inline unsigned long page_to_dma_pfn(struct page *pg)
87{
88 return mm_to_dma_pfn(page_to_pfn(pg));
89}
90static inline unsigned long virt_to_dma_pfn(void *p)
91{
92 return page_to_dma_pfn(virt_to_page(p));
93}
94
d9630fe9
WH
95/* global iommu list, set NULL for ignored DMAR units */
96static struct intel_iommu **g_iommus;
97
e0fc7e0b 98static void __init check_tylersburg_isoch(void);
9af88143
DW
99static int rwbf_quirk;
100
46b08e1a
MM
101/*
102 * 0: Present
103 * 1-11: Reserved
104 * 12-63: Context Ptr (12 - (haw-1))
105 * 64-127: Reserved
106 */
107struct root_entry {
108 u64 val;
109 u64 rsvd1;
110};
111#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
112static inline bool root_present(struct root_entry *root)
113{
114 return (root->val & 1);
115}
116static inline void set_root_present(struct root_entry *root)
117{
118 root->val |= 1;
119}
120static inline void set_root_value(struct root_entry *root, unsigned long value)
121{
122 root->val |= value & VTD_PAGE_MASK;
123}
124
125static inline struct context_entry *
126get_context_addr_from_root(struct root_entry *root)
127{
128 return (struct context_entry *)
129 (root_present(root)?phys_to_virt(
130 root->val & VTD_PAGE_MASK) :
131 NULL);
132}
133
7a8fc25e
MM
134/*
135 * low 64 bits:
136 * 0: present
137 * 1: fault processing disable
138 * 2-3: translation type
139 * 12-63: address space root
140 * high 64 bits:
141 * 0-2: address width
142 * 3-6: aval
143 * 8-23: domain id
144 */
145struct context_entry {
146 u64 lo;
147 u64 hi;
148};
c07e7d21
MM
149
150static inline bool context_present(struct context_entry *context)
151{
152 return (context->lo & 1);
153}
154static inline void context_set_present(struct context_entry *context)
155{
156 context->lo |= 1;
157}
158
159static inline void context_set_fault_enable(struct context_entry *context)
160{
161 context->lo &= (((u64)-1) << 2) | 1;
162}
163
c07e7d21
MM
164static inline void context_set_translation_type(struct context_entry *context,
165 unsigned long value)
166{
167 context->lo &= (((u64)-1) << 4) | 3;
168 context->lo |= (value & 3) << 2;
169}
170
171static inline void context_set_address_root(struct context_entry *context,
172 unsigned long value)
173{
174 context->lo |= value & VTD_PAGE_MASK;
175}
176
177static inline void context_set_address_width(struct context_entry *context,
178 unsigned long value)
179{
180 context->hi |= value & 7;
181}
182
183static inline void context_set_domain_id(struct context_entry *context,
184 unsigned long value)
185{
186 context->hi |= (value & ((1 << 16) - 1)) << 8;
187}
188
189static inline void context_clear_entry(struct context_entry *context)
190{
191 context->lo = 0;
192 context->hi = 0;
193}
7a8fc25e 194
622ba12a
MM
195/*
196 * 0: readable
197 * 1: writable
198 * 2-6: reserved
199 * 7: super page
9cf06697
SY
200 * 8-10: available
201 * 11: snoop behavior
622ba12a
MM
202 * 12-63: Host physcial address
203 */
204struct dma_pte {
205 u64 val;
206};
622ba12a 207
19c239ce
MM
208static inline void dma_clear_pte(struct dma_pte *pte)
209{
210 pte->val = 0;
211}
212
213static inline void dma_set_pte_readable(struct dma_pte *pte)
214{
215 pte->val |= DMA_PTE_READ;
216}
217
218static inline void dma_set_pte_writable(struct dma_pte *pte)
219{
220 pte->val |= DMA_PTE_WRITE;
221}
222
9cf06697
SY
223static inline void dma_set_pte_snp(struct dma_pte *pte)
224{
225 pte->val |= DMA_PTE_SNP;
226}
227
19c239ce
MM
228static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
229{
230 pte->val = (pte->val & ~3) | (prot & 3);
231}
232
233static inline u64 dma_pte_addr(struct dma_pte *pte)
234{
c85994e4
DW
235#ifdef CONFIG_64BIT
236 return pte->val & VTD_PAGE_MASK;
237#else
238 /* Must have a full atomic 64-bit read */
239 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
240#endif
19c239ce
MM
241}
242
dd4e8319 243static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
19c239ce 244{
dd4e8319 245 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
19c239ce
MM
246}
247
248static inline bool dma_pte_present(struct dma_pte *pte)
249{
250 return (pte->val & 3) != 0;
251}
622ba12a 252
75e6bf96
DW
253static inline int first_pte_in_page(struct dma_pte *pte)
254{
255 return !((unsigned long)pte & ~VTD_PAGE_MASK);
256}
257
2c2e2c38
FY
258/*
259 * This domain is a statically identity mapping domain.
260 * 1. This domain creats a static 1:1 mapping to all usable memory.
261 * 2. It maps to each iommu if successful.
262 * 3. Each iommu mapps to this domain if successful.
263 */
19943b0e
DW
264static struct dmar_domain *si_domain;
265static int hw_pass_through = 1;
2c2e2c38 266
3b5410e7 267/* devices under the same p2p bridge are owned in one domain */
cdc7b837 268#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 269
1ce28feb
WH
270/* domain represents a virtual machine, more than one devices
271 * across iommus may be owned in one domain, e.g. kvm guest.
272 */
273#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
274
2c2e2c38
FY
275/* si_domain contains mulitple devices */
276#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
277
99126f7c
MM
278struct dmar_domain {
279 int id; /* domain id */
4c923d47 280 int nid; /* node id */
8c11e798 281 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
282
283 struct list_head devices; /* all devices' list */
284 struct iova_domain iovad; /* iova's that belong to this domain */
285
286 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
287 int gaw; /* max guest address width */
288
289 /* adjusted guest address width, 0 is level 2 30-bit */
290 int agaw;
291
3b5410e7 292 int flags; /* flags to find out type of domain */
8e604097
WH
293
294 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 295 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d
WH
296 int iommu_count; /* reference count of iommu */
297 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 298 u64 max_addr; /* maximum mapped address */
99126f7c
MM
299};
300
a647dacb
MM
301/* PCI domain-device relationship */
302struct device_domain_info {
303 struct list_head link; /* link to domain siblings */
304 struct list_head global; /* link to global list */
276dbf99
DW
305 int segment; /* PCI domain */
306 u8 bus; /* PCI bus number */
a647dacb
MM
307 u8 devfn; /* PCI devfn number */
308 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
93a23a72 309 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
310 struct dmar_domain *domain; /* pointer to domain */
311};
312
5e0d2a6f 313static void flush_unmaps_timeout(unsigned long data);
314
315DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
316
80b20dd8 317#define HIGH_WATER_MARK 250
318struct deferred_flush_tables {
319 int next;
320 struct iova *iova[HIGH_WATER_MARK];
321 struct dmar_domain *domain[HIGH_WATER_MARK];
322};
323
324static struct deferred_flush_tables *deferred_flush;
325
5e0d2a6f 326/* bitmap for indexing intel_iommus */
5e0d2a6f 327static int g_num_of_iommus;
328
329static DEFINE_SPINLOCK(async_umap_flush_lock);
330static LIST_HEAD(unmaps_to_do);
331
332static int timer_on;
333static long list_size;
5e0d2a6f 334
ba395927
KA
335static void domain_remove_dev_info(struct dmar_domain *domain);
336
0cd5c3c8
KM
337#ifdef CONFIG_DMAR_DEFAULT_ON
338int dmar_disabled = 0;
339#else
340int dmar_disabled = 1;
341#endif /*CONFIG_DMAR_DEFAULT_ON*/
342
ba395927 343static int __initdata dmar_map_gfx = 1;
7d3b03ce 344static int dmar_forcedac;
5e0d2a6f 345static int intel_iommu_strict;
ba395927
KA
346
347#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
348static DEFINE_SPINLOCK(device_domain_lock);
349static LIST_HEAD(device_domain_list);
350
a8bcbb0d
JR
351static struct iommu_ops intel_iommu_ops;
352
ba395927
KA
353static int __init intel_iommu_setup(char *str)
354{
355 if (!str)
356 return -EINVAL;
357 while (*str) {
0cd5c3c8
KM
358 if (!strncmp(str, "on", 2)) {
359 dmar_disabled = 0;
360 printk(KERN_INFO "Intel-IOMMU: enabled\n");
361 } else if (!strncmp(str, "off", 3)) {
ba395927 362 dmar_disabled = 1;
0cd5c3c8 363 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
364 } else if (!strncmp(str, "igfx_off", 8)) {
365 dmar_map_gfx = 0;
366 printk(KERN_INFO
367 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 368 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 369 printk(KERN_INFO
7d3b03ce
KA
370 "Intel-IOMMU: Forcing DAC for PCI devices\n");
371 dmar_forcedac = 1;
5e0d2a6f 372 } else if (!strncmp(str, "strict", 6)) {
373 printk(KERN_INFO
374 "Intel-IOMMU: disable batched IOTLB flush\n");
375 intel_iommu_strict = 1;
ba395927
KA
376 }
377
378 str += strcspn(str, ",");
379 while (*str == ',')
380 str++;
381 }
382 return 0;
383}
384__setup("intel_iommu=", intel_iommu_setup);
385
386static struct kmem_cache *iommu_domain_cache;
387static struct kmem_cache *iommu_devinfo_cache;
388static struct kmem_cache *iommu_iova_cache;
389
eb3fa7cb
KA
390static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
391{
392 unsigned int flags;
393 void *vaddr;
394
395 /* trying to avoid low memory issues */
396 flags = current->flags & PF_MEMALLOC;
397 current->flags |= PF_MEMALLOC;
398 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
399 current->flags &= (~PF_MEMALLOC | flags);
400 return vaddr;
401}
402
403
4c923d47 404static inline void *alloc_pgtable_page(int node)
ba395927 405{
eb3fa7cb 406 unsigned int flags;
4c923d47
SS
407 struct page *page;
408 void *vaddr = NULL;
eb3fa7cb
KA
409
410 /* trying to avoid low memory issues */
411 flags = current->flags & PF_MEMALLOC;
412 current->flags |= PF_MEMALLOC;
4c923d47
SS
413 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
414 if (page)
415 vaddr = page_address(page);
eb3fa7cb
KA
416 current->flags &= (~PF_MEMALLOC | flags);
417 return vaddr;
ba395927
KA
418}
419
420static inline void free_pgtable_page(void *vaddr)
421{
422 free_page((unsigned long)vaddr);
423}
424
425static inline void *alloc_domain_mem(void)
426{
eb3fa7cb 427 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
428}
429
38717946 430static void free_domain_mem(void *vaddr)
ba395927
KA
431{
432 kmem_cache_free(iommu_domain_cache, vaddr);
433}
434
435static inline void * alloc_devinfo_mem(void)
436{
eb3fa7cb 437 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
438}
439
440static inline void free_devinfo_mem(void *vaddr)
441{
442 kmem_cache_free(iommu_devinfo_cache, vaddr);
443}
444
445struct iova *alloc_iova_mem(void)
446{
eb3fa7cb 447 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
448}
449
450void free_iova_mem(struct iova *iova)
451{
452 kmem_cache_free(iommu_iova_cache, iova);
453}
454
1b573683
WH
455
456static inline int width_to_agaw(int width);
457
4ed0d3e6 458static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
459{
460 unsigned long sagaw;
461 int agaw = -1;
462
463 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 464 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
465 agaw >= 0; agaw--) {
466 if (test_bit(agaw, &sagaw))
467 break;
468 }
469
470 return agaw;
471}
472
4ed0d3e6
FY
473/*
474 * Calculate max SAGAW for each iommu.
475 */
476int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
477{
478 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
479}
480
481/*
482 * calculate agaw for each iommu.
483 * "SAGAW" may be different across iommus, use a default agaw, and
484 * get a supported less agaw for iommus that don't support the default agaw.
485 */
486int iommu_calculate_agaw(struct intel_iommu *iommu)
487{
488 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
489}
490
2c2e2c38 491/* This functionin only returns single iommu in a domain */
8c11e798
WH
492static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
493{
494 int iommu_id;
495
2c2e2c38 496 /* si_domain and vm domain should not get here. */
1ce28feb 497 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 498 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 499
8c11e798
WH
500 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
501 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
502 return NULL;
503
504 return g_iommus[iommu_id];
505}
506
8e604097
WH
507static void domain_update_iommu_coherency(struct dmar_domain *domain)
508{
509 int i;
510
511 domain->iommu_coherency = 1;
512
513 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
514 for (; i < g_num_of_iommus; ) {
515 if (!ecap_coherent(g_iommus[i]->ecap)) {
516 domain->iommu_coherency = 0;
517 break;
518 }
519 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
520 }
521}
522
58c610bd
SY
523static void domain_update_iommu_snooping(struct dmar_domain *domain)
524{
525 int i;
526
527 domain->iommu_snooping = 1;
528
529 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
530 for (; i < g_num_of_iommus; ) {
531 if (!ecap_sc_support(g_iommus[i]->ecap)) {
532 domain->iommu_snooping = 0;
533 break;
534 }
535 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
536 }
537}
538
539/* Some capabilities may be different across iommus */
540static void domain_update_iommu_cap(struct dmar_domain *domain)
541{
542 domain_update_iommu_coherency(domain);
543 domain_update_iommu_snooping(domain);
544}
545
276dbf99 546static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
547{
548 struct dmar_drhd_unit *drhd = NULL;
549 int i;
550
551 for_each_drhd_unit(drhd) {
552 if (drhd->ignored)
553 continue;
276dbf99
DW
554 if (segment != drhd->segment)
555 continue;
c7151a8d 556
924b6231 557 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
558 if (drhd->devices[i] &&
559 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
560 drhd->devices[i]->devfn == devfn)
561 return drhd->iommu;
4958c5dc
DW
562 if (drhd->devices[i] &&
563 drhd->devices[i]->subordinate &&
924b6231
DW
564 drhd->devices[i]->subordinate->number <= bus &&
565 drhd->devices[i]->subordinate->subordinate >= bus)
566 return drhd->iommu;
567 }
c7151a8d
WH
568
569 if (drhd->include_all)
570 return drhd->iommu;
571 }
572
573 return NULL;
574}
575
5331fe6f
WH
576static void domain_flush_cache(struct dmar_domain *domain,
577 void *addr, int size)
578{
579 if (!domain->iommu_coherency)
580 clflush_cache_range(addr, size);
581}
582
ba395927
KA
583/* Gets context entry for a given bus and devfn */
584static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
585 u8 bus, u8 devfn)
586{
587 struct root_entry *root;
588 struct context_entry *context;
589 unsigned long phy_addr;
590 unsigned long flags;
591
592 spin_lock_irqsave(&iommu->lock, flags);
593 root = &iommu->root_entry[bus];
594 context = get_context_addr_from_root(root);
595 if (!context) {
4c923d47
SS
596 context = (struct context_entry *)
597 alloc_pgtable_page(iommu->node);
ba395927
KA
598 if (!context) {
599 spin_unlock_irqrestore(&iommu->lock, flags);
600 return NULL;
601 }
5b6985ce 602 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
603 phy_addr = virt_to_phys((void *)context);
604 set_root_value(root, phy_addr);
605 set_root_present(root);
606 __iommu_flush_cache(iommu, root, sizeof(*root));
607 }
608 spin_unlock_irqrestore(&iommu->lock, flags);
609 return &context[devfn];
610}
611
612static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
613{
614 struct root_entry *root;
615 struct context_entry *context;
616 int ret;
617 unsigned long flags;
618
619 spin_lock_irqsave(&iommu->lock, flags);
620 root = &iommu->root_entry[bus];
621 context = get_context_addr_from_root(root);
622 if (!context) {
623 ret = 0;
624 goto out;
625 }
c07e7d21 626 ret = context_present(&context[devfn]);
ba395927
KA
627out:
628 spin_unlock_irqrestore(&iommu->lock, flags);
629 return ret;
630}
631
632static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
633{
634 struct root_entry *root;
635 struct context_entry *context;
636 unsigned long flags;
637
638 spin_lock_irqsave(&iommu->lock, flags);
639 root = &iommu->root_entry[bus];
640 context = get_context_addr_from_root(root);
641 if (context) {
c07e7d21 642 context_clear_entry(&context[devfn]);
ba395927
KA
643 __iommu_flush_cache(iommu, &context[devfn], \
644 sizeof(*context));
645 }
646 spin_unlock_irqrestore(&iommu->lock, flags);
647}
648
649static void free_context_table(struct intel_iommu *iommu)
650{
651 struct root_entry *root;
652 int i;
653 unsigned long flags;
654 struct context_entry *context;
655
656 spin_lock_irqsave(&iommu->lock, flags);
657 if (!iommu->root_entry) {
658 goto out;
659 }
660 for (i = 0; i < ROOT_ENTRY_NR; i++) {
661 root = &iommu->root_entry[i];
662 context = get_context_addr_from_root(root);
663 if (context)
664 free_pgtable_page(context);
665 }
666 free_pgtable_page(iommu->root_entry);
667 iommu->root_entry = NULL;
668out:
669 spin_unlock_irqrestore(&iommu->lock, flags);
670}
671
672/* page table handling */
673#define LEVEL_STRIDE (9)
674#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
675
676static inline int agaw_to_level(int agaw)
677{
678 return agaw + 2;
679}
680
681static inline int agaw_to_width(int agaw)
682{
683 return 30 + agaw * LEVEL_STRIDE;
684
685}
686
687static inline int width_to_agaw(int width)
688{
689 return (width - 30) / LEVEL_STRIDE;
690}
691
692static inline unsigned int level_to_offset_bits(int level)
693{
6660c63a 694 return (level - 1) * LEVEL_STRIDE;
ba395927
KA
695}
696
77dfa56c 697static inline int pfn_level_offset(unsigned long pfn, int level)
ba395927 698{
6660c63a 699 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
ba395927
KA
700}
701
6660c63a 702static inline unsigned long level_mask(int level)
ba395927 703{
6660c63a 704 return -1UL << level_to_offset_bits(level);
ba395927
KA
705}
706
6660c63a 707static inline unsigned long level_size(int level)
ba395927 708{
6660c63a 709 return 1UL << level_to_offset_bits(level);
ba395927
KA
710}
711
6660c63a 712static inline unsigned long align_to_level(unsigned long pfn, int level)
ba395927 713{
6660c63a 714 return (pfn + level_size(level) - 1) & level_mask(level);
ba395927
KA
715}
716
b026fd28
DW
717static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
718 unsigned long pfn)
ba395927 719{
b026fd28 720 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
721 struct dma_pte *parent, *pte = NULL;
722 int level = agaw_to_level(domain->agaw);
723 int offset;
ba395927
KA
724
725 BUG_ON(!domain->pgd);
b026fd28 726 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
ba395927
KA
727 parent = domain->pgd;
728
ba395927
KA
729 while (level > 0) {
730 void *tmp_page;
731
b026fd28 732 offset = pfn_level_offset(pfn, level);
ba395927
KA
733 pte = &parent[offset];
734 if (level == 1)
735 break;
736
19c239ce 737 if (!dma_pte_present(pte)) {
c85994e4
DW
738 uint64_t pteval;
739
4c923d47 740 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 741
206a73c1 742 if (!tmp_page)
ba395927 743 return NULL;
206a73c1 744
c85994e4 745 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 746 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
747 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
748 /* Someone else set it while we were thinking; use theirs. */
749 free_pgtable_page(tmp_page);
750 } else {
751 dma_pte_addr(pte);
752 domain_flush_cache(domain, pte, sizeof(*pte));
753 }
ba395927 754 }
19c239ce 755 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
756 level--;
757 }
758
ba395927
KA
759 return pte;
760}
761
762/* return address's pte at specific level */
90dcfb5e
DW
763static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
764 unsigned long pfn,
765 int level)
ba395927
KA
766{
767 struct dma_pte *parent, *pte = NULL;
768 int total = agaw_to_level(domain->agaw);
769 int offset;
770
771 parent = domain->pgd;
772 while (level <= total) {
90dcfb5e 773 offset = pfn_level_offset(pfn, total);
ba395927
KA
774 pte = &parent[offset];
775 if (level == total)
776 return pte;
777
19c239ce 778 if (!dma_pte_present(pte))
ba395927 779 break;
19c239ce 780 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
781 total--;
782 }
783 return NULL;
784}
785
ba395927 786/* clear last level pte, a tlb flush should be followed */
595badf5
DW
787static void dma_pte_clear_range(struct dmar_domain *domain,
788 unsigned long start_pfn,
789 unsigned long last_pfn)
ba395927 790{
04b18e65 791 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
310a5ab9 792 struct dma_pte *first_pte, *pte;
66eae846 793
04b18e65 794 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 795 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 796 BUG_ON(start_pfn > last_pfn);
ba395927 797
04b18e65 798 /* we don't need lock here; nobody else touches the iova range */
59c36286 799 do {
310a5ab9
DW
800 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
801 if (!pte) {
802 start_pfn = align_to_level(start_pfn + 1, 2);
803 continue;
804 }
75e6bf96 805 do {
310a5ab9
DW
806 dma_clear_pte(pte);
807 start_pfn++;
808 pte++;
75e6bf96
DW
809 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
810
310a5ab9
DW
811 domain_flush_cache(domain, first_pte,
812 (void *)pte - (void *)first_pte);
59c36286
DW
813
814 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
815}
816
817/* free page table pages. last level pte should already be cleared */
818static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
819 unsigned long start_pfn,
820 unsigned long last_pfn)
ba395927 821{
6660c63a 822 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
f3a0a52f 823 struct dma_pte *first_pte, *pte;
ba395927
KA
824 int total = agaw_to_level(domain->agaw);
825 int level;
6660c63a 826 unsigned long tmp;
ba395927 827
6660c63a
DW
828 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
829 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 830 BUG_ON(start_pfn > last_pfn);
ba395927 831
f3a0a52f 832 /* We don't need lock here; nobody else touches the iova range */
ba395927
KA
833 level = 2;
834 while (level <= total) {
6660c63a
DW
835 tmp = align_to_level(start_pfn, level);
836
f3a0a52f 837 /* If we can't even clear one PTE at this level, we're done */
6660c63a 838 if (tmp + level_size(level) - 1 > last_pfn)
ba395927
KA
839 return;
840
59c36286 841 do {
f3a0a52f
DW
842 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
843 if (!pte) {
844 tmp = align_to_level(tmp + 1, level + 1);
845 continue;
846 }
75e6bf96 847 do {
6a43e574
DW
848 if (dma_pte_present(pte)) {
849 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
850 dma_clear_pte(pte);
851 }
f3a0a52f
DW
852 pte++;
853 tmp += level_size(level);
75e6bf96
DW
854 } while (!first_pte_in_page(pte) &&
855 tmp + level_size(level) - 1 <= last_pfn);
856
f3a0a52f
DW
857 domain_flush_cache(domain, first_pte,
858 (void *)pte - (void *)first_pte);
859
59c36286 860 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
ba395927
KA
861 level++;
862 }
863 /* free pgd */
d794dc9b 864 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
865 free_pgtable_page(domain->pgd);
866 domain->pgd = NULL;
867 }
868}
869
870/* iommu handling */
871static int iommu_alloc_root_entry(struct intel_iommu *iommu)
872{
873 struct root_entry *root;
874 unsigned long flags;
875
4c923d47 876 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
877 if (!root)
878 return -ENOMEM;
879
5b6985ce 880 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
881
882 spin_lock_irqsave(&iommu->lock, flags);
883 iommu->root_entry = root;
884 spin_unlock_irqrestore(&iommu->lock, flags);
885
886 return 0;
887}
888
ba395927
KA
889static void iommu_set_root_entry(struct intel_iommu *iommu)
890{
891 void *addr;
c416daa9 892 u32 sts;
ba395927
KA
893 unsigned long flag;
894
895 addr = iommu->root_entry;
896
897 spin_lock_irqsave(&iommu->register_lock, flag);
898 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
899
c416daa9 900 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
901
902 /* Make sure hardware complete it */
903 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 904 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927
KA
905
906 spin_unlock_irqrestore(&iommu->register_lock, flag);
907}
908
909static void iommu_flush_write_buffer(struct intel_iommu *iommu)
910{
911 u32 val;
912 unsigned long flag;
913
9af88143 914 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 915 return;
ba395927
KA
916
917 spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 918 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
919
920 /* Make sure hardware complete it */
921 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 922 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927
KA
923
924 spin_unlock_irqrestore(&iommu->register_lock, flag);
925}
926
927/* return value determine if we need a write buffer flush */
4c25a2c1
DW
928static void __iommu_flush_context(struct intel_iommu *iommu,
929 u16 did, u16 source_id, u8 function_mask,
930 u64 type)
ba395927
KA
931{
932 u64 val = 0;
933 unsigned long flag;
934
ba395927
KA
935 switch (type) {
936 case DMA_CCMD_GLOBAL_INVL:
937 val = DMA_CCMD_GLOBAL_INVL;
938 break;
939 case DMA_CCMD_DOMAIN_INVL:
940 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
941 break;
942 case DMA_CCMD_DEVICE_INVL:
943 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
944 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
945 break;
946 default:
947 BUG();
948 }
949 val |= DMA_CCMD_ICC;
950
951 spin_lock_irqsave(&iommu->register_lock, flag);
952 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
953
954 /* Make sure hardware complete it */
955 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
956 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
957
958 spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
959}
960
ba395927 961/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
962static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
963 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
964{
965 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
966 u64 val = 0, val_iva = 0;
967 unsigned long flag;
968
ba395927
KA
969 switch (type) {
970 case DMA_TLB_GLOBAL_FLUSH:
971 /* global flush doesn't need set IVA_REG */
972 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
973 break;
974 case DMA_TLB_DSI_FLUSH:
975 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
976 break;
977 case DMA_TLB_PSI_FLUSH:
978 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
979 /* Note: always flush non-leaf currently */
980 val_iva = size_order | addr;
981 break;
982 default:
983 BUG();
984 }
985 /* Note: set drain read/write */
986#if 0
987 /*
988 * This is probably to be super secure.. Looks like we can
989 * ignore it without any impact.
990 */
991 if (cap_read_drain(iommu->cap))
992 val |= DMA_TLB_READ_DRAIN;
993#endif
994 if (cap_write_drain(iommu->cap))
995 val |= DMA_TLB_WRITE_DRAIN;
996
997 spin_lock_irqsave(&iommu->register_lock, flag);
998 /* Note: Only uses first TLB reg currently */
999 if (val_iva)
1000 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1001 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1002
1003 /* Make sure hardware complete it */
1004 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1005 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1006
1007 spin_unlock_irqrestore(&iommu->register_lock, flag);
1008
1009 /* check IOTLB invalidation granularity */
1010 if (DMA_TLB_IAIG(val) == 0)
1011 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1012 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1013 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1014 (unsigned long long)DMA_TLB_IIRG(type),
1015 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1016}
1017
93a23a72
YZ
1018static struct device_domain_info *iommu_support_dev_iotlb(
1019 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1020{
1021 int found = 0;
1022 unsigned long flags;
1023 struct device_domain_info *info;
1024 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1025
1026 if (!ecap_dev_iotlb_support(iommu->ecap))
1027 return NULL;
1028
1029 if (!iommu->qi)
1030 return NULL;
1031
1032 spin_lock_irqsave(&device_domain_lock, flags);
1033 list_for_each_entry(info, &domain->devices, link)
1034 if (info->bus == bus && info->devfn == devfn) {
1035 found = 1;
1036 break;
1037 }
1038 spin_unlock_irqrestore(&device_domain_lock, flags);
1039
1040 if (!found || !info->dev)
1041 return NULL;
1042
1043 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1044 return NULL;
1045
1046 if (!dmar_find_matched_atsr_unit(info->dev))
1047 return NULL;
1048
1049 info->iommu = iommu;
1050
1051 return info;
1052}
1053
1054static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1055{
93a23a72
YZ
1056 if (!info)
1057 return;
1058
1059 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1060}
1061
1062static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1063{
1064 if (!info->dev || !pci_ats_enabled(info->dev))
1065 return;
1066
1067 pci_disable_ats(info->dev);
1068}
1069
1070static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1071 u64 addr, unsigned mask)
1072{
1073 u16 sid, qdep;
1074 unsigned long flags;
1075 struct device_domain_info *info;
1076
1077 spin_lock_irqsave(&device_domain_lock, flags);
1078 list_for_each_entry(info, &domain->devices, link) {
1079 if (!info->dev || !pci_ats_enabled(info->dev))
1080 continue;
1081
1082 sid = info->bus << 8 | info->devfn;
1083 qdep = pci_ats_queue_depth(info->dev);
1084 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1085 }
1086 spin_unlock_irqrestore(&device_domain_lock, flags);
1087}
1088
1f0ef2aa 1089static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
03d6a246 1090 unsigned long pfn, unsigned int pages)
ba395927 1091{
9dd2fe89 1092 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1093 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1094
ba395927
KA
1095 BUG_ON(pages == 0);
1096
ba395927 1097 /*
9dd2fe89
YZ
1098 * Fallback to domain selective flush if no PSI support or the size is
1099 * too big.
ba395927
KA
1100 * PSI requires page size to be 2 ^ x, and the base address is naturally
1101 * aligned to the size
1102 */
9dd2fe89
YZ
1103 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1104 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1105 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1106 else
1107 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1108 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1109
1110 /*
1111 * In caching mode, domain ID 0 is reserved for non-present to present
1112 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
1113 */
1114 if (!cap_caching_mode(iommu->cap) || did)
93a23a72 1115 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1116}
1117
f8bab735 1118static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1119{
1120 u32 pmen;
1121 unsigned long flags;
1122
1123 spin_lock_irqsave(&iommu->register_lock, flags);
1124 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1125 pmen &= ~DMA_PMEN_EPM;
1126 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1127
1128 /* wait for the protected region status bit to clear */
1129 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1130 readl, !(pmen & DMA_PMEN_PRS), pmen);
1131
1132 spin_unlock_irqrestore(&iommu->register_lock, flags);
1133}
1134
ba395927
KA
1135static int iommu_enable_translation(struct intel_iommu *iommu)
1136{
1137 u32 sts;
1138 unsigned long flags;
1139
1140 spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1141 iommu->gcmd |= DMA_GCMD_TE;
1142 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1143
1144 /* Make sure hardware complete it */
1145 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1146 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1147
ba395927
KA
1148 spin_unlock_irqrestore(&iommu->register_lock, flags);
1149 return 0;
1150}
1151
1152static int iommu_disable_translation(struct intel_iommu *iommu)
1153{
1154 u32 sts;
1155 unsigned long flag;
1156
1157 spin_lock_irqsave(&iommu->register_lock, flag);
1158 iommu->gcmd &= ~DMA_GCMD_TE;
1159 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1160
1161 /* Make sure hardware complete it */
1162 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1163 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927
KA
1164
1165 spin_unlock_irqrestore(&iommu->register_lock, flag);
1166 return 0;
1167}
1168
3460a6d9 1169
ba395927
KA
1170static int iommu_init_domains(struct intel_iommu *iommu)
1171{
1172 unsigned long ndomains;
1173 unsigned long nlongs;
1174
1175 ndomains = cap_ndoms(iommu->cap);
1176 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1177 nlongs = BITS_TO_LONGS(ndomains);
1178
94a91b50
DD
1179 spin_lock_init(&iommu->lock);
1180
ba395927
KA
1181 /* TBD: there might be 64K domains,
1182 * consider other allocation for future chip
1183 */
1184 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1185 if (!iommu->domain_ids) {
1186 printk(KERN_ERR "Allocating domain id array failed\n");
1187 return -ENOMEM;
1188 }
1189 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1190 GFP_KERNEL);
1191 if (!iommu->domains) {
1192 printk(KERN_ERR "Allocating domain array failed\n");
ba395927
KA
1193 return -ENOMEM;
1194 }
1195
1196 /*
1197 * if Caching mode is set, then invalid translations are tagged
1198 * with domainid 0. Hence we need to pre-allocate it.
1199 */
1200 if (cap_caching_mode(iommu->cap))
1201 set_bit(0, iommu->domain_ids);
1202 return 0;
1203}
ba395927 1204
ba395927
KA
1205
1206static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1207static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1208
1209void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1210{
1211 struct dmar_domain *domain;
1212 int i;
c7151a8d 1213 unsigned long flags;
ba395927 1214
94a91b50
DD
1215 if ((iommu->domains) && (iommu->domain_ids)) {
1216 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1217 for (; i < cap_ndoms(iommu->cap); ) {
1218 domain = iommu->domains[i];
1219 clear_bit(i, iommu->domain_ids);
1220
1221 spin_lock_irqsave(&domain->iommu_lock, flags);
1222 if (--domain->iommu_count == 0) {
1223 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1224 vm_domain_exit(domain);
1225 else
1226 domain_exit(domain);
1227 }
1228 spin_unlock_irqrestore(&domain->iommu_lock, flags);
c7151a8d 1229
94a91b50
DD
1230 i = find_next_bit(iommu->domain_ids,
1231 cap_ndoms(iommu->cap), i+1);
5e98c4b1 1232 }
ba395927
KA
1233 }
1234
1235 if (iommu->gcmd & DMA_GCMD_TE)
1236 iommu_disable_translation(iommu);
1237
1238 if (iommu->irq) {
1239 set_irq_data(iommu->irq, NULL);
1240 /* This will mask the irq */
1241 free_irq(iommu->irq, iommu);
1242 destroy_irq(iommu->irq);
1243 }
1244
1245 kfree(iommu->domains);
1246 kfree(iommu->domain_ids);
1247
d9630fe9
WH
1248 g_iommus[iommu->seq_id] = NULL;
1249
1250 /* if all iommus are freed, free g_iommus */
1251 for (i = 0; i < g_num_of_iommus; i++) {
1252 if (g_iommus[i])
1253 break;
1254 }
1255
1256 if (i == g_num_of_iommus)
1257 kfree(g_iommus);
1258
ba395927
KA
1259 /* free context mapping */
1260 free_context_table(iommu);
ba395927
KA
1261}
1262
2c2e2c38 1263static struct dmar_domain *alloc_domain(void)
ba395927 1264{
ba395927 1265 struct dmar_domain *domain;
ba395927
KA
1266
1267 domain = alloc_domain_mem();
1268 if (!domain)
1269 return NULL;
1270
4c923d47 1271 domain->nid = -1;
2c2e2c38
FY
1272 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1273 domain->flags = 0;
1274
1275 return domain;
1276}
1277
1278static int iommu_attach_domain(struct dmar_domain *domain,
1279 struct intel_iommu *iommu)
1280{
1281 int num;
1282 unsigned long ndomains;
1283 unsigned long flags;
1284
ba395927
KA
1285 ndomains = cap_ndoms(iommu->cap);
1286
1287 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1288
ba395927
KA
1289 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1290 if (num >= ndomains) {
1291 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1292 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1293 return -ENOMEM;
ba395927
KA
1294 }
1295
ba395927 1296 domain->id = num;
2c2e2c38 1297 set_bit(num, iommu->domain_ids);
8c11e798 1298 set_bit(iommu->seq_id, &domain->iommu_bmp);
ba395927
KA
1299 iommu->domains[num] = domain;
1300 spin_unlock_irqrestore(&iommu->lock, flags);
1301
2c2e2c38 1302 return 0;
ba395927
KA
1303}
1304
2c2e2c38
FY
1305static void iommu_detach_domain(struct dmar_domain *domain,
1306 struct intel_iommu *iommu)
ba395927
KA
1307{
1308 unsigned long flags;
2c2e2c38
FY
1309 int num, ndomains;
1310 int found = 0;
ba395927 1311
8c11e798 1312 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38
FY
1313 ndomains = cap_ndoms(iommu->cap);
1314 num = find_first_bit(iommu->domain_ids, ndomains);
1315 for (; num < ndomains; ) {
1316 if (iommu->domains[num] == domain) {
1317 found = 1;
1318 break;
1319 }
1320 num = find_next_bit(iommu->domain_ids,
1321 cap_ndoms(iommu->cap), num+1);
1322 }
1323
1324 if (found) {
1325 clear_bit(num, iommu->domain_ids);
1326 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1327 iommu->domains[num] = NULL;
1328 }
8c11e798 1329 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1330}
1331
1332static struct iova_domain reserved_iova_list;
8a443df4 1333static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1334
1335static void dmar_init_reserved_ranges(void)
1336{
1337 struct pci_dev *pdev = NULL;
1338 struct iova *iova;
1339 int i;
ba395927 1340
f661197e 1341 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1342
8a443df4
MG
1343 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1344 &reserved_rbtree_key);
1345
ba395927
KA
1346 /* IOAPIC ranges shouldn't be accessed by DMA */
1347 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1348 IOVA_PFN(IOAPIC_RANGE_END));
1349 if (!iova)
1350 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1351
1352 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1353 for_each_pci_dev(pdev) {
1354 struct resource *r;
1355
1356 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1357 r = &pdev->resource[i];
1358 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1359 continue;
1a4a4551
DW
1360 iova = reserve_iova(&reserved_iova_list,
1361 IOVA_PFN(r->start),
1362 IOVA_PFN(r->end));
ba395927
KA
1363 if (!iova)
1364 printk(KERN_ERR "Reserve iova failed\n");
1365 }
1366 }
1367
1368}
1369
1370static void domain_reserve_special_ranges(struct dmar_domain *domain)
1371{
1372 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1373}
1374
1375static inline int guestwidth_to_adjustwidth(int gaw)
1376{
1377 int agaw;
1378 int r = (gaw - 12) % 9;
1379
1380 if (r == 0)
1381 agaw = gaw;
1382 else
1383 agaw = gaw + 9 - r;
1384 if (agaw > 64)
1385 agaw = 64;
1386 return agaw;
1387}
1388
1389static int domain_init(struct dmar_domain *domain, int guest_width)
1390{
1391 struct intel_iommu *iommu;
1392 int adjust_width, agaw;
1393 unsigned long sagaw;
1394
f661197e 1395 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
c7151a8d 1396 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1397
1398 domain_reserve_special_ranges(domain);
1399
1400 /* calculate AGAW */
8c11e798 1401 iommu = domain_get_iommu(domain);
ba395927
KA
1402 if (guest_width > cap_mgaw(iommu->cap))
1403 guest_width = cap_mgaw(iommu->cap);
1404 domain->gaw = guest_width;
1405 adjust_width = guestwidth_to_adjustwidth(guest_width);
1406 agaw = width_to_agaw(adjust_width);
1407 sagaw = cap_sagaw(iommu->cap);
1408 if (!test_bit(agaw, &sagaw)) {
1409 /* hardware doesn't support it, choose a bigger one */
1410 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1411 agaw = find_next_bit(&sagaw, 5, agaw);
1412 if (agaw >= 5)
1413 return -ENODEV;
1414 }
1415 domain->agaw = agaw;
1416 INIT_LIST_HEAD(&domain->devices);
1417
8e604097
WH
1418 if (ecap_coherent(iommu->ecap))
1419 domain->iommu_coherency = 1;
1420 else
1421 domain->iommu_coherency = 0;
1422
58c610bd
SY
1423 if (ecap_sc_support(iommu->ecap))
1424 domain->iommu_snooping = 1;
1425 else
1426 domain->iommu_snooping = 0;
1427
c7151a8d 1428 domain->iommu_count = 1;
4c923d47 1429 domain->nid = iommu->node;
c7151a8d 1430
ba395927 1431 /* always allocate the top pgd */
4c923d47 1432 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1433 if (!domain->pgd)
1434 return -ENOMEM;
5b6985ce 1435 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1436 return 0;
1437}
1438
1439static void domain_exit(struct dmar_domain *domain)
1440{
2c2e2c38
FY
1441 struct dmar_drhd_unit *drhd;
1442 struct intel_iommu *iommu;
ba395927
KA
1443
1444 /* Domain 0 is reserved, so dont process it */
1445 if (!domain)
1446 return;
1447
1448 domain_remove_dev_info(domain);
1449 /* destroy iovas */
1450 put_iova_domain(&domain->iovad);
ba395927
KA
1451
1452 /* clear ptes */
595badf5 1453 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1454
1455 /* free page tables */
d794dc9b 1456 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1457
2c2e2c38
FY
1458 for_each_active_iommu(iommu, drhd)
1459 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1460 iommu_detach_domain(domain, iommu);
1461
ba395927
KA
1462 free_domain_mem(domain);
1463}
1464
4ed0d3e6
FY
1465static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1466 u8 bus, u8 devfn, int translation)
ba395927
KA
1467{
1468 struct context_entry *context;
ba395927 1469 unsigned long flags;
5331fe6f 1470 struct intel_iommu *iommu;
ea6606b0
WH
1471 struct dma_pte *pgd;
1472 unsigned long num;
1473 unsigned long ndomains;
1474 int id;
1475 int agaw;
93a23a72 1476 struct device_domain_info *info = NULL;
ba395927
KA
1477
1478 pr_debug("Set context mapping for %02x:%02x.%d\n",
1479 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1480
ba395927 1481 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1482 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1483 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1484
276dbf99 1485 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1486 if (!iommu)
1487 return -ENODEV;
1488
ba395927
KA
1489 context = device_to_context_entry(iommu, bus, devfn);
1490 if (!context)
1491 return -ENOMEM;
1492 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1493 if (context_present(context)) {
ba395927
KA
1494 spin_unlock_irqrestore(&iommu->lock, flags);
1495 return 0;
1496 }
1497
ea6606b0
WH
1498 id = domain->id;
1499 pgd = domain->pgd;
1500
2c2e2c38
FY
1501 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1502 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1503 int found = 0;
1504
1505 /* find an available domain id for this device in iommu */
1506 ndomains = cap_ndoms(iommu->cap);
1507 num = find_first_bit(iommu->domain_ids, ndomains);
1508 for (; num < ndomains; ) {
1509 if (iommu->domains[num] == domain) {
1510 id = num;
1511 found = 1;
1512 break;
1513 }
1514 num = find_next_bit(iommu->domain_ids,
1515 cap_ndoms(iommu->cap), num+1);
1516 }
1517
1518 if (found == 0) {
1519 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1520 if (num >= ndomains) {
1521 spin_unlock_irqrestore(&iommu->lock, flags);
1522 printk(KERN_ERR "IOMMU: no free domain ids\n");
1523 return -EFAULT;
1524 }
1525
1526 set_bit(num, iommu->domain_ids);
1527 iommu->domains[num] = domain;
1528 id = num;
1529 }
1530
1531 /* Skip top levels of page tables for
1532 * iommu which has less agaw than default.
1533 */
1534 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1535 pgd = phys_to_virt(dma_pte_addr(pgd));
1536 if (!dma_pte_present(pgd)) {
1537 spin_unlock_irqrestore(&iommu->lock, flags);
1538 return -ENOMEM;
1539 }
1540 }
1541 }
1542
1543 context_set_domain_id(context, id);
4ed0d3e6 1544
93a23a72
YZ
1545 if (translation != CONTEXT_TT_PASS_THROUGH) {
1546 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1547 translation = info ? CONTEXT_TT_DEV_IOTLB :
1548 CONTEXT_TT_MULTI_LEVEL;
1549 }
4ed0d3e6
FY
1550 /*
1551 * In pass through mode, AW must be programmed to indicate the largest
1552 * AGAW value supported by hardware. And ASR is ignored by hardware.
1553 */
93a23a72 1554 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1555 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1556 else {
1557 context_set_address_root(context, virt_to_phys(pgd));
1558 context_set_address_width(context, iommu->agaw);
1559 }
4ed0d3e6
FY
1560
1561 context_set_translation_type(context, translation);
c07e7d21
MM
1562 context_set_fault_enable(context);
1563 context_set_present(context);
5331fe6f 1564 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1565
4c25a2c1
DW
1566 /*
1567 * It's a non-present to present mapping. If hardware doesn't cache
1568 * non-present entry we only need to flush the write-buffer. If the
1569 * _does_ cache non-present entries, then it does so in the special
1570 * domain #0, which we have to flush:
1571 */
1572 if (cap_caching_mode(iommu->cap)) {
1573 iommu->flush.flush_context(iommu, 0,
1574 (((u16)bus) << 8) | devfn,
1575 DMA_CCMD_MASK_NOBIT,
1576 DMA_CCMD_DEVICE_INVL);
1f0ef2aa 1577 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1578 } else {
ba395927 1579 iommu_flush_write_buffer(iommu);
4c25a2c1 1580 }
93a23a72 1581 iommu_enable_dev_iotlb(info);
ba395927 1582 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1583
1584 spin_lock_irqsave(&domain->iommu_lock, flags);
1585 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1586 domain->iommu_count++;
4c923d47
SS
1587 if (domain->iommu_count == 1)
1588 domain->nid = iommu->node;
58c610bd 1589 domain_update_iommu_cap(domain);
c7151a8d
WH
1590 }
1591 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1592 return 0;
1593}
1594
1595static int
4ed0d3e6
FY
1596domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1597 int translation)
ba395927
KA
1598{
1599 int ret;
1600 struct pci_dev *tmp, *parent;
1601
276dbf99 1602 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1603 pdev->bus->number, pdev->devfn,
1604 translation);
ba395927
KA
1605 if (ret)
1606 return ret;
1607
1608 /* dependent device mapping */
1609 tmp = pci_find_upstream_pcie_bridge(pdev);
1610 if (!tmp)
1611 return 0;
1612 /* Secondary interface's bus number and devfn 0 */
1613 parent = pdev->bus->self;
1614 while (parent != tmp) {
276dbf99
DW
1615 ret = domain_context_mapping_one(domain,
1616 pci_domain_nr(parent->bus),
1617 parent->bus->number,
4ed0d3e6 1618 parent->devfn, translation);
ba395927
KA
1619 if (ret)
1620 return ret;
1621 parent = parent->bus->self;
1622 }
1623 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1624 return domain_context_mapping_one(domain,
276dbf99 1625 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1626 tmp->subordinate->number, 0,
1627 translation);
ba395927
KA
1628 else /* this is a legacy PCI bridge */
1629 return domain_context_mapping_one(domain,
276dbf99
DW
1630 pci_domain_nr(tmp->bus),
1631 tmp->bus->number,
4ed0d3e6
FY
1632 tmp->devfn,
1633 translation);
ba395927
KA
1634}
1635
5331fe6f 1636static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1637{
1638 int ret;
1639 struct pci_dev *tmp, *parent;
5331fe6f
WH
1640 struct intel_iommu *iommu;
1641
276dbf99
DW
1642 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1643 pdev->devfn);
5331fe6f
WH
1644 if (!iommu)
1645 return -ENODEV;
ba395927 1646
276dbf99 1647 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1648 if (!ret)
1649 return ret;
1650 /* dependent device mapping */
1651 tmp = pci_find_upstream_pcie_bridge(pdev);
1652 if (!tmp)
1653 return ret;
1654 /* Secondary interface's bus number and devfn 0 */
1655 parent = pdev->bus->self;
1656 while (parent != tmp) {
8c11e798 1657 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1658 parent->devfn);
ba395927
KA
1659 if (!ret)
1660 return ret;
1661 parent = parent->bus->self;
1662 }
1663 if (tmp->is_pcie)
276dbf99
DW
1664 return device_context_mapped(iommu, tmp->subordinate->number,
1665 0);
ba395927 1666 else
276dbf99
DW
1667 return device_context_mapped(iommu, tmp->bus->number,
1668 tmp->devfn);
ba395927
KA
1669}
1670
f532959b
FY
1671/* Returns a number of VTD pages, but aligned to MM page size */
1672static inline unsigned long aligned_nrpages(unsigned long host_addr,
1673 size_t size)
1674{
1675 host_addr &= ~PAGE_MASK;
1676 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1677}
1678
9051aa02
DW
1679static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1680 struct scatterlist *sg, unsigned long phys_pfn,
1681 unsigned long nr_pages, int prot)
e1605495
DW
1682{
1683 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1684 phys_addr_t uninitialized_var(pteval);
e1605495 1685 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1686 unsigned long sg_res;
e1605495
DW
1687
1688 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1689
1690 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1691 return -EINVAL;
1692
1693 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1694
9051aa02
DW
1695 if (sg)
1696 sg_res = 0;
1697 else {
1698 sg_res = nr_pages + 1;
1699 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1700 }
1701
e1605495 1702 while (nr_pages--) {
c85994e4
DW
1703 uint64_t tmp;
1704
e1605495 1705 if (!sg_res) {
f532959b 1706 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1707 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1708 sg->dma_length = sg->length;
1709 pteval = page_to_phys(sg_page(sg)) | prot;
1710 }
1711 if (!pte) {
1712 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1713 if (!pte)
1714 return -ENOMEM;
1715 }
1716 /* We don't need lock here, nobody else
1717 * touches the iova range
1718 */
7766a3fb 1719 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1720 if (tmp) {
1bf20f0d 1721 static int dumps = 5;
c85994e4
DW
1722 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1723 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1724 if (dumps) {
1725 dumps--;
1726 debug_dma_dump_mappings(NULL);
1727 }
1728 WARN_ON(1);
1729 }
e1605495 1730 pte++;
75e6bf96 1731 if (!nr_pages || first_pte_in_page(pte)) {
e1605495
DW
1732 domain_flush_cache(domain, first_pte,
1733 (void *)pte - (void *)first_pte);
1734 pte = NULL;
1735 }
1736 iov_pfn++;
1737 pteval += VTD_PAGE_SIZE;
1738 sg_res--;
1739 if (!sg_res)
1740 sg = sg_next(sg);
1741 }
1742 return 0;
1743}
1744
9051aa02
DW
1745static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1746 struct scatterlist *sg, unsigned long nr_pages,
1747 int prot)
ba395927 1748{
9051aa02
DW
1749 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1750}
6f6a00e4 1751
9051aa02
DW
1752static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1753 unsigned long phys_pfn, unsigned long nr_pages,
1754 int prot)
1755{
1756 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1757}
1758
c7151a8d 1759static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1760{
c7151a8d
WH
1761 if (!iommu)
1762 return;
8c11e798
WH
1763
1764 clear_context_table(iommu, bus, devfn);
1765 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1766 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1767 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1768}
1769
1770static void domain_remove_dev_info(struct dmar_domain *domain)
1771{
1772 struct device_domain_info *info;
1773 unsigned long flags;
c7151a8d 1774 struct intel_iommu *iommu;
ba395927
KA
1775
1776 spin_lock_irqsave(&device_domain_lock, flags);
1777 while (!list_empty(&domain->devices)) {
1778 info = list_entry(domain->devices.next,
1779 struct device_domain_info, link);
1780 list_del(&info->link);
1781 list_del(&info->global);
1782 if (info->dev)
358dd8ac 1783 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1784 spin_unlock_irqrestore(&device_domain_lock, flags);
1785
93a23a72 1786 iommu_disable_dev_iotlb(info);
276dbf99 1787 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1788 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1789 free_devinfo_mem(info);
1790
1791 spin_lock_irqsave(&device_domain_lock, flags);
1792 }
1793 spin_unlock_irqrestore(&device_domain_lock, flags);
1794}
1795
1796/*
1797 * find_domain
358dd8ac 1798 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1799 */
38717946 1800static struct dmar_domain *
ba395927
KA
1801find_domain(struct pci_dev *pdev)
1802{
1803 struct device_domain_info *info;
1804
1805 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1806 info = pdev->dev.archdata.iommu;
ba395927
KA
1807 if (info)
1808 return info->domain;
1809 return NULL;
1810}
1811
ba395927
KA
1812/* domain is initialized */
1813static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1814{
1815 struct dmar_domain *domain, *found = NULL;
1816 struct intel_iommu *iommu;
1817 struct dmar_drhd_unit *drhd;
1818 struct device_domain_info *info, *tmp;
1819 struct pci_dev *dev_tmp;
1820 unsigned long flags;
1821 int bus = 0, devfn = 0;
276dbf99 1822 int segment;
2c2e2c38 1823 int ret;
ba395927
KA
1824
1825 domain = find_domain(pdev);
1826 if (domain)
1827 return domain;
1828
276dbf99
DW
1829 segment = pci_domain_nr(pdev->bus);
1830
ba395927
KA
1831 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1832 if (dev_tmp) {
1833 if (dev_tmp->is_pcie) {
1834 bus = dev_tmp->subordinate->number;
1835 devfn = 0;
1836 } else {
1837 bus = dev_tmp->bus->number;
1838 devfn = dev_tmp->devfn;
1839 }
1840 spin_lock_irqsave(&device_domain_lock, flags);
1841 list_for_each_entry(info, &device_domain_list, global) {
276dbf99
DW
1842 if (info->segment == segment &&
1843 info->bus == bus && info->devfn == devfn) {
ba395927
KA
1844 found = info->domain;
1845 break;
1846 }
1847 }
1848 spin_unlock_irqrestore(&device_domain_lock, flags);
1849 /* pcie-pci bridge already has a domain, uses it */
1850 if (found) {
1851 domain = found;
1852 goto found_domain;
1853 }
1854 }
1855
2c2e2c38
FY
1856 domain = alloc_domain();
1857 if (!domain)
1858 goto error;
1859
ba395927
KA
1860 /* Allocate new domain for the device */
1861 drhd = dmar_find_matched_drhd_unit(pdev);
1862 if (!drhd) {
1863 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1864 pci_name(pdev));
1865 return NULL;
1866 }
1867 iommu = drhd->iommu;
1868
2c2e2c38
FY
1869 ret = iommu_attach_domain(domain, iommu);
1870 if (ret) {
1871 domain_exit(domain);
ba395927 1872 goto error;
2c2e2c38 1873 }
ba395927
KA
1874
1875 if (domain_init(domain, gaw)) {
1876 domain_exit(domain);
1877 goto error;
1878 }
1879
1880 /* register pcie-to-pci device */
1881 if (dev_tmp) {
1882 info = alloc_devinfo_mem();
1883 if (!info) {
1884 domain_exit(domain);
1885 goto error;
1886 }
276dbf99 1887 info->segment = segment;
ba395927
KA
1888 info->bus = bus;
1889 info->devfn = devfn;
1890 info->dev = NULL;
1891 info->domain = domain;
1892 /* This domain is shared by devices under p2p bridge */
3b5410e7 1893 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1894
1895 /* pcie-to-pci bridge already has a domain, uses it */
1896 found = NULL;
1897 spin_lock_irqsave(&device_domain_lock, flags);
1898 list_for_each_entry(tmp, &device_domain_list, global) {
276dbf99
DW
1899 if (tmp->segment == segment &&
1900 tmp->bus == bus && tmp->devfn == devfn) {
ba395927
KA
1901 found = tmp->domain;
1902 break;
1903 }
1904 }
1905 if (found) {
1906 free_devinfo_mem(info);
1907 domain_exit(domain);
1908 domain = found;
1909 } else {
1910 list_add(&info->link, &domain->devices);
1911 list_add(&info->global, &device_domain_list);
1912 }
1913 spin_unlock_irqrestore(&device_domain_lock, flags);
1914 }
1915
1916found_domain:
1917 info = alloc_devinfo_mem();
1918 if (!info)
1919 goto error;
276dbf99 1920 info->segment = segment;
ba395927
KA
1921 info->bus = pdev->bus->number;
1922 info->devfn = pdev->devfn;
1923 info->dev = pdev;
1924 info->domain = domain;
1925 spin_lock_irqsave(&device_domain_lock, flags);
1926 /* somebody is fast */
1927 found = find_domain(pdev);
1928 if (found != NULL) {
1929 spin_unlock_irqrestore(&device_domain_lock, flags);
1930 if (found != domain) {
1931 domain_exit(domain);
1932 domain = found;
1933 }
1934 free_devinfo_mem(info);
1935 return domain;
1936 }
1937 list_add(&info->link, &domain->devices);
1938 list_add(&info->global, &device_domain_list);
358dd8ac 1939 pdev->dev.archdata.iommu = info;
ba395927
KA
1940 spin_unlock_irqrestore(&device_domain_lock, flags);
1941 return domain;
1942error:
1943 /* recheck it here, maybe others set it */
1944 return find_domain(pdev);
1945}
1946
2c2e2c38 1947static int iommu_identity_mapping;
e0fc7e0b
DW
1948#define IDENTMAP_ALL 1
1949#define IDENTMAP_GFX 2
1950#define IDENTMAP_AZALIA 4
2c2e2c38 1951
b213203e
DW
1952static int iommu_domain_identity_map(struct dmar_domain *domain,
1953 unsigned long long start,
1954 unsigned long long end)
ba395927 1955{
c5395d5c
DW
1956 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1957 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1958
1959 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1960 dma_to_mm_pfn(last_vpfn))) {
ba395927 1961 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 1962 return -ENOMEM;
ba395927
KA
1963 }
1964
c5395d5c
DW
1965 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1966 start, end, domain->id);
ba395927
KA
1967 /*
1968 * RMRR range might have overlap with physical memory range,
1969 * clear it first
1970 */
c5395d5c 1971 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 1972
c5395d5c
DW
1973 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1974 last_vpfn - first_vpfn + 1,
61df7443 1975 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
1976}
1977
1978static int iommu_prepare_identity_map(struct pci_dev *pdev,
1979 unsigned long long start,
1980 unsigned long long end)
1981{
1982 struct dmar_domain *domain;
1983 int ret;
1984
c7ab48d2 1985 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
1986 if (!domain)
1987 return -ENOMEM;
1988
19943b0e
DW
1989 /* For _hardware_ passthrough, don't bother. But for software
1990 passthrough, we do it anyway -- it may indicate a memory
1991 range which is reserved in E820, so which didn't get set
1992 up to start with in si_domain */
1993 if (domain == si_domain && hw_pass_through) {
1994 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
1995 pci_name(pdev), start, end);
1996 return 0;
1997 }
1998
1999 printk(KERN_INFO
2000 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2001 pci_name(pdev), start, end);
2ff729f5 2002
5595b528
DW
2003 if (end < start) {
2004 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2005 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2006 dmi_get_system_info(DMI_BIOS_VENDOR),
2007 dmi_get_system_info(DMI_BIOS_VERSION),
2008 dmi_get_system_info(DMI_PRODUCT_VERSION));
2009 ret = -EIO;
2010 goto error;
2011 }
2012
2ff729f5
DW
2013 if (end >> agaw_to_width(domain->agaw)) {
2014 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2015 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2016 agaw_to_width(domain->agaw),
2017 dmi_get_system_info(DMI_BIOS_VENDOR),
2018 dmi_get_system_info(DMI_BIOS_VERSION),
2019 dmi_get_system_info(DMI_PRODUCT_VERSION));
2020 ret = -EIO;
2021 goto error;
2022 }
19943b0e 2023
b213203e 2024 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2025 if (ret)
2026 goto error;
2027
2028 /* context entry init */
4ed0d3e6 2029 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2030 if (ret)
2031 goto error;
2032
2033 return 0;
2034
2035 error:
ba395927
KA
2036 domain_exit(domain);
2037 return ret;
ba395927
KA
2038}
2039
2040static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2041 struct pci_dev *pdev)
2042{
358dd8ac 2043 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2044 return 0;
2045 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2046 rmrr->end_address + 1);
2047}
2048
49a0429e
KA
2049#ifdef CONFIG_DMAR_FLOPPY_WA
2050static inline void iommu_prepare_isa(void)
2051{
2052 struct pci_dev *pdev;
2053 int ret;
2054
2055 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2056 if (!pdev)
2057 return;
2058
c7ab48d2 2059 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
49a0429e
KA
2060 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
2061
2062 if (ret)
c7ab48d2
DW
2063 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2064 "floppy might not work\n");
49a0429e
KA
2065
2066}
2067#else
2068static inline void iommu_prepare_isa(void)
2069{
2070 return;
2071}
2072#endif /* !CONFIG_DMAR_FLPY_WA */
2073
2c2e2c38 2074static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2
DW
2075
2076static int __init si_domain_work_fn(unsigned long start_pfn,
2077 unsigned long end_pfn, void *datax)
2078{
2079 int *ret = datax;
2080
2081 *ret = iommu_domain_identity_map(si_domain,
2082 (uint64_t)start_pfn << PAGE_SHIFT,
2083 (uint64_t)end_pfn << PAGE_SHIFT);
2084 return *ret;
2085
2086}
2087
071e1374 2088static int __init si_domain_init(int hw)
2c2e2c38
FY
2089{
2090 struct dmar_drhd_unit *drhd;
2091 struct intel_iommu *iommu;
c7ab48d2 2092 int nid, ret = 0;
2c2e2c38
FY
2093
2094 si_domain = alloc_domain();
2095 if (!si_domain)
2096 return -EFAULT;
2097
c7ab48d2 2098 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2c2e2c38
FY
2099
2100 for_each_active_iommu(iommu, drhd) {
2101 ret = iommu_attach_domain(si_domain, iommu);
2102 if (ret) {
2103 domain_exit(si_domain);
2104 return -EFAULT;
2105 }
2106 }
2107
2108 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2109 domain_exit(si_domain);
2110 return -EFAULT;
2111 }
2112
2113 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2114
19943b0e
DW
2115 if (hw)
2116 return 0;
2117
c7ab48d2
DW
2118 for_each_online_node(nid) {
2119 work_with_active_regions(nid, si_domain_work_fn, &ret);
2120 if (ret)
2121 return ret;
2122 }
2123
2c2e2c38
FY
2124 return 0;
2125}
2126
2127static void domain_remove_one_dev_info(struct dmar_domain *domain,
2128 struct pci_dev *pdev);
2129static int identity_mapping(struct pci_dev *pdev)
2130{
2131 struct device_domain_info *info;
2132
2133 if (likely(!iommu_identity_mapping))
2134 return 0;
2135
2136
2137 list_for_each_entry(info, &si_domain->devices, link)
2138 if (info->dev == pdev)
2139 return 1;
2140 return 0;
2141}
2142
2143static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2144 struct pci_dev *pdev,
2145 int translation)
2c2e2c38
FY
2146{
2147 struct device_domain_info *info;
2148 unsigned long flags;
5fe60f4e 2149 int ret;
2c2e2c38
FY
2150
2151 info = alloc_devinfo_mem();
2152 if (!info)
2153 return -ENOMEM;
2154
5fe60f4e
DW
2155 ret = domain_context_mapping(domain, pdev, translation);
2156 if (ret) {
2157 free_devinfo_mem(info);
2158 return ret;
2159 }
2160
2c2e2c38
FY
2161 info->segment = pci_domain_nr(pdev->bus);
2162 info->bus = pdev->bus->number;
2163 info->devfn = pdev->devfn;
2164 info->dev = pdev;
2165 info->domain = domain;
2166
2167 spin_lock_irqsave(&device_domain_lock, flags);
2168 list_add(&info->link, &domain->devices);
2169 list_add(&info->global, &device_domain_list);
2170 pdev->dev.archdata.iommu = info;
2171 spin_unlock_irqrestore(&device_domain_lock, flags);
2172
2173 return 0;
2174}
2175
6941af28
DW
2176static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2177{
e0fc7e0b
DW
2178 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2179 return 1;
2180
2181 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2182 return 1;
2183
2184 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2185 return 0;
6941af28 2186
3dfc813d
DW
2187 /*
2188 * We want to start off with all devices in the 1:1 domain, and
2189 * take them out later if we find they can't access all of memory.
2190 *
2191 * However, we can't do this for PCI devices behind bridges,
2192 * because all PCI devices behind the same bridge will end up
2193 * with the same source-id on their transactions.
2194 *
2195 * Practically speaking, we can't change things around for these
2196 * devices at run-time, because we can't be sure there'll be no
2197 * DMA transactions in flight for any of their siblings.
2198 *
2199 * So PCI devices (unless they're on the root bus) as well as
2200 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2201 * the 1:1 domain, just in _case_ one of their siblings turns out
2202 * not to be able to map all of memory.
2203 */
2204 if (!pdev->is_pcie) {
2205 if (!pci_is_root_bus(pdev->bus))
2206 return 0;
2207 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2208 return 0;
2209 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2210 return 0;
2211
2212 /*
2213 * At boot time, we don't yet know if devices will be 64-bit capable.
2214 * Assume that they will -- if they turn out not to be, then we can
2215 * take them out of the 1:1 domain later.
2216 */
6941af28
DW
2217 if (!startup)
2218 return pdev->dma_mask > DMA_BIT_MASK(32);
2219
2220 return 1;
2221}
2222
071e1374 2223static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2224{
2c2e2c38
FY
2225 struct pci_dev *pdev = NULL;
2226 int ret;
2227
19943b0e 2228 ret = si_domain_init(hw);
2c2e2c38
FY
2229 if (ret)
2230 return -EFAULT;
2231
2c2e2c38 2232 for_each_pci_dev(pdev) {
6941af28 2233 if (iommu_should_identity_map(pdev, 1)) {
19943b0e
DW
2234 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2235 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2236
5fe60f4e 2237 ret = domain_add_dev_info(si_domain, pdev,
19943b0e 2238 hw ? CONTEXT_TT_PASS_THROUGH :
62edf5dc
DW
2239 CONTEXT_TT_MULTI_LEVEL);
2240 if (ret)
2241 return ret;
62edf5dc 2242 }
2c2e2c38
FY
2243 }
2244
2245 return 0;
2246}
2247
2248int __init init_dmars(void)
ba395927
KA
2249{
2250 struct dmar_drhd_unit *drhd;
2251 struct dmar_rmrr_unit *rmrr;
2252 struct pci_dev *pdev;
2253 struct intel_iommu *iommu;
9d783ba0 2254 int i, ret;
2c2e2c38 2255
ba395927
KA
2256 /*
2257 * for each drhd
2258 * allocate root
2259 * initialize and program root entry to not present
2260 * endfor
2261 */
2262 for_each_drhd_unit(drhd) {
5e0d2a6f 2263 g_num_of_iommus++;
2264 /*
2265 * lock not needed as this is only incremented in the single
2266 * threaded kernel __init code path all other access are read
2267 * only
2268 */
2269 }
2270
d9630fe9
WH
2271 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2272 GFP_KERNEL);
2273 if (!g_iommus) {
2274 printk(KERN_ERR "Allocating global iommu array failed\n");
2275 ret = -ENOMEM;
2276 goto error;
2277 }
2278
80b20dd8 2279 deferred_flush = kzalloc(g_num_of_iommus *
2280 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2281 if (!deferred_flush) {
5e0d2a6f 2282 ret = -ENOMEM;
2283 goto error;
2284 }
2285
5e0d2a6f 2286 for_each_drhd_unit(drhd) {
2287 if (drhd->ignored)
2288 continue;
1886e8a9
SS
2289
2290 iommu = drhd->iommu;
d9630fe9 2291 g_iommus[iommu->seq_id] = iommu;
ba395927 2292
e61d98d8
SS
2293 ret = iommu_init_domains(iommu);
2294 if (ret)
2295 goto error;
2296
ba395927
KA
2297 /*
2298 * TBD:
2299 * we could share the same root & context tables
2300 * amoung all IOMMU's. Need to Split it later.
2301 */
2302 ret = iommu_alloc_root_entry(iommu);
2303 if (ret) {
2304 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2305 goto error;
2306 }
4ed0d3e6 2307 if (!ecap_pass_through(iommu->ecap))
19943b0e 2308 hw_pass_through = 0;
ba395927
KA
2309 }
2310
1531a6a6
SS
2311 /*
2312 * Start from the sane iommu hardware state.
2313 */
a77b67d4
YS
2314 for_each_drhd_unit(drhd) {
2315 if (drhd->ignored)
2316 continue;
2317
2318 iommu = drhd->iommu;
1531a6a6
SS
2319
2320 /*
2321 * If the queued invalidation is already initialized by us
2322 * (for example, while enabling interrupt-remapping) then
2323 * we got the things already rolling from a sane state.
2324 */
2325 if (iommu->qi)
2326 continue;
2327
2328 /*
2329 * Clear any previous faults.
2330 */
2331 dmar_fault(-1, iommu);
2332 /*
2333 * Disable queued invalidation if supported and already enabled
2334 * before OS handover.
2335 */
2336 dmar_disable_qi(iommu);
2337 }
2338
2339 for_each_drhd_unit(drhd) {
2340 if (drhd->ignored)
2341 continue;
2342
2343 iommu = drhd->iommu;
2344
a77b67d4
YS
2345 if (dmar_enable_qi(iommu)) {
2346 /*
2347 * Queued Invalidate not enabled, use Register Based
2348 * Invalidate
2349 */
2350 iommu->flush.flush_context = __iommu_flush_context;
2351 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2352 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
2353 "invalidation\n",
2354 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2355 } else {
2356 iommu->flush.flush_context = qi_flush_context;
2357 iommu->flush.flush_iotlb = qi_flush_iotlb;
2358 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
2359 "invalidation\n",
2360 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2361 }
2362 }
2363
19943b0e 2364 if (iommu_pass_through)
e0fc7e0b
DW
2365 iommu_identity_mapping |= IDENTMAP_ALL;
2366
19943b0e 2367#ifdef CONFIG_DMAR_BROKEN_GFX_WA
e0fc7e0b 2368 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2369#endif
e0fc7e0b
DW
2370
2371 check_tylersburg_isoch();
2372
ba395927 2373 /*
19943b0e
DW
2374 * If pass through is not set or not enabled, setup context entries for
2375 * identity mappings for rmrr, gfx, and isa and may fall back to static
2376 * identity mapping if iommu_identity_mapping is set.
ba395927 2377 */
19943b0e
DW
2378 if (iommu_identity_mapping) {
2379 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2380 if (ret) {
19943b0e
DW
2381 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2382 goto error;
ba395927
KA
2383 }
2384 }
ba395927 2385 /*
19943b0e
DW
2386 * For each rmrr
2387 * for each dev attached to rmrr
2388 * do
2389 * locate drhd for dev, alloc domain for dev
2390 * allocate free domain
2391 * allocate page table entries for rmrr
2392 * if context not allocated for bus
2393 * allocate and init context
2394 * set present in root table for this bus
2395 * init context with domain, translation etc
2396 * endfor
2397 * endfor
ba395927 2398 */
19943b0e
DW
2399 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2400 for_each_rmrr_units(rmrr) {
2401 for (i = 0; i < rmrr->devices_cnt; i++) {
2402 pdev = rmrr->devices[i];
2403 /*
2404 * some BIOS lists non-exist devices in DMAR
2405 * table.
2406 */
2407 if (!pdev)
2408 continue;
2409 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2410 if (ret)
2411 printk(KERN_ERR
2412 "IOMMU: mapping reserved region failed\n");
ba395927 2413 }
4ed0d3e6 2414 }
49a0429e 2415
19943b0e
DW
2416 iommu_prepare_isa();
2417
ba395927
KA
2418 /*
2419 * for each drhd
2420 * enable fault log
2421 * global invalidate context cache
2422 * global invalidate iotlb
2423 * enable translation
2424 */
2425 for_each_drhd_unit(drhd) {
2426 if (drhd->ignored)
2427 continue;
2428 iommu = drhd->iommu;
ba395927
KA
2429
2430 iommu_flush_write_buffer(iommu);
2431
3460a6d9
KA
2432 ret = dmar_set_interrupt(iommu);
2433 if (ret)
2434 goto error;
2435
ba395927
KA
2436 iommu_set_root_entry(iommu);
2437
4c25a2c1 2438 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2439 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2440
ba395927
KA
2441 ret = iommu_enable_translation(iommu);
2442 if (ret)
2443 goto error;
b94996c9
DW
2444
2445 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2446 }
2447
2448 return 0;
2449error:
2450 for_each_drhd_unit(drhd) {
2451 if (drhd->ignored)
2452 continue;
2453 iommu = drhd->iommu;
2454 free_iommu(iommu);
2455 }
d9630fe9 2456 kfree(g_iommus);
ba395927
KA
2457 return ret;
2458}
2459
5a5e02a6 2460/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2461static struct iova *intel_alloc_iova(struct device *dev,
2462 struct dmar_domain *domain,
2463 unsigned long nrpages, uint64_t dma_mask)
ba395927 2464{
ba395927 2465 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2466 struct iova *iova = NULL;
ba395927 2467
875764de
DW
2468 /* Restrict dma_mask to the width that the iommu can handle */
2469 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2470
2471 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2472 /*
2473 * First try to allocate an io virtual address in
284901a9 2474 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2475 * from higher range
ba395927 2476 */
875764de
DW
2477 iova = alloc_iova(&domain->iovad, nrpages,
2478 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2479 if (iova)
2480 return iova;
2481 }
2482 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2483 if (unlikely(!iova)) {
2484 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2485 nrpages, pci_name(pdev));
f76aec76
KA
2486 return NULL;
2487 }
2488
2489 return iova;
2490}
2491
147202aa 2492static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2493{
2494 struct dmar_domain *domain;
2495 int ret;
2496
2497 domain = get_domain_for_dev(pdev,
2498 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2499 if (!domain) {
2500 printk(KERN_ERR
2501 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2502 return NULL;
ba395927
KA
2503 }
2504
2505 /* make sure context mapping is ok */
5331fe6f 2506 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2507 ret = domain_context_mapping(domain, pdev,
2508 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2509 if (ret) {
2510 printk(KERN_ERR
2511 "Domain context map for %s failed",
2512 pci_name(pdev));
4fe05bbc 2513 return NULL;
f76aec76 2514 }
ba395927
KA
2515 }
2516
f76aec76
KA
2517 return domain;
2518}
2519
147202aa
DW
2520static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2521{
2522 struct device_domain_info *info;
2523
2524 /* No lock here, assumes no domain exit in normal case */
2525 info = dev->dev.archdata.iommu;
2526 if (likely(info))
2527 return info->domain;
2528
2529 return __get_valid_domain_for_dev(dev);
2530}
2531
2c2e2c38
FY
2532static int iommu_dummy(struct pci_dev *pdev)
2533{
2534 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2535}
2536
2537/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2538static int iommu_no_mapping(struct device *dev)
2c2e2c38 2539{
73676832 2540 struct pci_dev *pdev;
2c2e2c38
FY
2541 int found;
2542
73676832
DW
2543 if (unlikely(dev->bus != &pci_bus_type))
2544 return 1;
2545
2546 pdev = to_pci_dev(dev);
1e4c64c4
DW
2547 if (iommu_dummy(pdev))
2548 return 1;
2549
2c2e2c38 2550 if (!iommu_identity_mapping)
1e4c64c4 2551 return 0;
2c2e2c38
FY
2552
2553 found = identity_mapping(pdev);
2554 if (found) {
6941af28 2555 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2556 return 1;
2557 else {
2558 /*
2559 * 32 bit DMA is removed from si_domain and fall back
2560 * to non-identity mapping.
2561 */
2562 domain_remove_one_dev_info(si_domain, pdev);
2563 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2564 pci_name(pdev));
2565 return 0;
2566 }
2567 } else {
2568 /*
2569 * In case of a detached 64 bit DMA device from vm, the device
2570 * is put into si_domain for identity mapping.
2571 */
6941af28 2572 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2573 int ret;
5fe60f4e
DW
2574 ret = domain_add_dev_info(si_domain, pdev,
2575 hw_pass_through ?
2576 CONTEXT_TT_PASS_THROUGH :
2577 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2578 if (!ret) {
2579 printk(KERN_INFO "64bit %s uses identity mapping\n",
2580 pci_name(pdev));
2581 return 1;
2582 }
2583 }
2584 }
2585
1e4c64c4 2586 return 0;
2c2e2c38
FY
2587}
2588
bb9e6d65
FT
2589static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2590 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2591{
2592 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2593 struct dmar_domain *domain;
5b6985ce 2594 phys_addr_t start_paddr;
f76aec76
KA
2595 struct iova *iova;
2596 int prot = 0;
6865f0d1 2597 int ret;
8c11e798 2598 struct intel_iommu *iommu;
33041ec0 2599 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2600
2601 BUG_ON(dir == DMA_NONE);
2c2e2c38 2602
73676832 2603 if (iommu_no_mapping(hwdev))
6865f0d1 2604 return paddr;
f76aec76
KA
2605
2606 domain = get_valid_domain_for_dev(pdev);
2607 if (!domain)
2608 return 0;
2609
8c11e798 2610 iommu = domain_get_iommu(domain);
88cb6a74 2611 size = aligned_nrpages(paddr, size);
f76aec76 2612
5a5e02a6
DW
2613 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2614 pdev->dma_mask);
f76aec76
KA
2615 if (!iova)
2616 goto error;
2617
ba395927
KA
2618 /*
2619 * Check if DMAR supports zero-length reads on write only
2620 * mappings..
2621 */
2622 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2623 !cap_zlr(iommu->cap))
ba395927
KA
2624 prot |= DMA_PTE_READ;
2625 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2626 prot |= DMA_PTE_WRITE;
2627 /*
6865f0d1 2628 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2629 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2630 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2631 * is not a big problem
2632 */
0ab36de2 2633 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2634 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2635 if (ret)
2636 goto error;
2637
1f0ef2aa
DW
2638 /* it's a non-present to present mapping. Only flush if caching mode */
2639 if (cap_caching_mode(iommu->cap))
03d6a246 2640 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
1f0ef2aa 2641 else
8c11e798 2642 iommu_flush_write_buffer(iommu);
f76aec76 2643
03d6a246
DW
2644 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2645 start_paddr += paddr & ~PAGE_MASK;
2646 return start_paddr;
ba395927 2647
ba395927 2648error:
f76aec76
KA
2649 if (iova)
2650 __free_iova(&domain->iovad, iova);
4cf2e75d 2651 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2652 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2653 return 0;
2654}
2655
ffbbef5c
FT
2656static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2657 unsigned long offset, size_t size,
2658 enum dma_data_direction dir,
2659 struct dma_attrs *attrs)
bb9e6d65 2660{
ffbbef5c
FT
2661 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2662 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2663}
2664
5e0d2a6f 2665static void flush_unmaps(void)
2666{
80b20dd8 2667 int i, j;
5e0d2a6f 2668
5e0d2a6f 2669 timer_on = 0;
2670
2671 /* just flush them all */
2672 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2673 struct intel_iommu *iommu = g_iommus[i];
2674 if (!iommu)
2675 continue;
c42d9f32 2676
9dd2fe89
YZ
2677 if (!deferred_flush[i].next)
2678 continue;
2679
2680 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2681 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2682 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2683 unsigned long mask;
2684 struct iova *iova = deferred_flush[i].iova[j];
2685
64de5af0 2686 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
93a23a72 2687 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
64de5af0 2688 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
93a23a72 2689 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2690 }
9dd2fe89 2691 deferred_flush[i].next = 0;
5e0d2a6f 2692 }
2693
5e0d2a6f 2694 list_size = 0;
5e0d2a6f 2695}
2696
2697static void flush_unmaps_timeout(unsigned long data)
2698{
80b20dd8 2699 unsigned long flags;
2700
2701 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2702 flush_unmaps();
80b20dd8 2703 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2704}
2705
2706static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2707{
2708 unsigned long flags;
80b20dd8 2709 int next, iommu_id;
8c11e798 2710 struct intel_iommu *iommu;
5e0d2a6f 2711
2712 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2713 if (list_size == HIGH_WATER_MARK)
2714 flush_unmaps();
2715
8c11e798
WH
2716 iommu = domain_get_iommu(dom);
2717 iommu_id = iommu->seq_id;
c42d9f32 2718
80b20dd8 2719 next = deferred_flush[iommu_id].next;
2720 deferred_flush[iommu_id].domain[next] = dom;
2721 deferred_flush[iommu_id].iova[next] = iova;
2722 deferred_flush[iommu_id].next++;
5e0d2a6f 2723
2724 if (!timer_on) {
2725 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2726 timer_on = 1;
2727 }
2728 list_size++;
2729 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2730}
2731
ffbbef5c
FT
2732static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2733 size_t size, enum dma_data_direction dir,
2734 struct dma_attrs *attrs)
ba395927 2735{
ba395927 2736 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2737 struct dmar_domain *domain;
d794dc9b 2738 unsigned long start_pfn, last_pfn;
ba395927 2739 struct iova *iova;
8c11e798 2740 struct intel_iommu *iommu;
ba395927 2741
73676832 2742 if (iommu_no_mapping(dev))
f76aec76 2743 return;
2c2e2c38 2744
ba395927
KA
2745 domain = find_domain(pdev);
2746 BUG_ON(!domain);
2747
8c11e798
WH
2748 iommu = domain_get_iommu(domain);
2749
ba395927 2750 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2751 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2752 (unsigned long long)dev_addr))
ba395927 2753 return;
ba395927 2754
d794dc9b
DW
2755 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2756 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2757
d794dc9b
DW
2758 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2759 pci_name(pdev), start_pfn, last_pfn);
ba395927 2760
f76aec76 2761 /* clear the whole page */
d794dc9b
DW
2762 dma_pte_clear_range(domain, start_pfn, last_pfn);
2763
f76aec76 2764 /* free page tables */
d794dc9b
DW
2765 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2766
5e0d2a6f 2767 if (intel_iommu_strict) {
03d6a246 2768 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
d794dc9b 2769 last_pfn - start_pfn + 1);
5e0d2a6f 2770 /* free iova */
2771 __free_iova(&domain->iovad, iova);
2772 } else {
2773 add_unmap(domain, iova);
2774 /*
2775 * queue up the release of the unmap to save the 1/6th of the
2776 * cpu used up by the iotlb flush operation...
2777 */
5e0d2a6f 2778 }
ba395927
KA
2779}
2780
d7ab5c46
FT
2781static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2782 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2783{
2784 void *vaddr;
2785 int order;
2786
5b6985ce 2787 size = PAGE_ALIGN(size);
ba395927 2788 order = get_order(size);
e8bb910d
AW
2789
2790 if (!iommu_no_mapping(hwdev))
2791 flags &= ~(GFP_DMA | GFP_DMA32);
2792 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2793 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2794 flags |= GFP_DMA;
2795 else
2796 flags |= GFP_DMA32;
2797 }
ba395927
KA
2798
2799 vaddr = (void *)__get_free_pages(flags, order);
2800 if (!vaddr)
2801 return NULL;
2802 memset(vaddr, 0, size);
2803
bb9e6d65
FT
2804 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2805 DMA_BIDIRECTIONAL,
2806 hwdev->coherent_dma_mask);
ba395927
KA
2807 if (*dma_handle)
2808 return vaddr;
2809 free_pages((unsigned long)vaddr, order);
2810 return NULL;
2811}
2812
d7ab5c46
FT
2813static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2814 dma_addr_t dma_handle)
ba395927
KA
2815{
2816 int order;
2817
5b6985ce 2818 size = PAGE_ALIGN(size);
ba395927
KA
2819 order = get_order(size);
2820
0db9b7ae 2821 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
2822 free_pages((unsigned long)vaddr, order);
2823}
2824
d7ab5c46
FT
2825static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2826 int nelems, enum dma_data_direction dir,
2827 struct dma_attrs *attrs)
ba395927 2828{
ba395927
KA
2829 struct pci_dev *pdev = to_pci_dev(hwdev);
2830 struct dmar_domain *domain;
d794dc9b 2831 unsigned long start_pfn, last_pfn;
f76aec76 2832 struct iova *iova;
8c11e798 2833 struct intel_iommu *iommu;
ba395927 2834
73676832 2835 if (iommu_no_mapping(hwdev))
ba395927
KA
2836 return;
2837
2838 domain = find_domain(pdev);
8c11e798
WH
2839 BUG_ON(!domain);
2840
2841 iommu = domain_get_iommu(domain);
ba395927 2842
c03ab37c 2843 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
2844 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2845 (unsigned long long)sglist[0].dma_address))
f76aec76 2846 return;
f76aec76 2847
d794dc9b
DW
2848 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2849 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
2850
2851 /* clear the whole page */
d794dc9b
DW
2852 dma_pte_clear_range(domain, start_pfn, last_pfn);
2853
f76aec76 2854 /* free page tables */
d794dc9b 2855 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 2856
acea0018
DW
2857 if (intel_iommu_strict) {
2858 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2859 last_pfn - start_pfn + 1);
2860 /* free iova */
2861 __free_iova(&domain->iovad, iova);
2862 } else {
2863 add_unmap(domain, iova);
2864 /*
2865 * queue up the release of the unmap to save the 1/6th of the
2866 * cpu used up by the iotlb flush operation...
2867 */
2868 }
ba395927
KA
2869}
2870
ba395927 2871static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2872 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2873{
2874 int i;
c03ab37c 2875 struct scatterlist *sg;
ba395927 2876
c03ab37c 2877 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2878 BUG_ON(!sg_page(sg));
4cf2e75d 2879 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 2880 sg->dma_length = sg->length;
ba395927
KA
2881 }
2882 return nelems;
2883}
2884
d7ab5c46
FT
2885static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2886 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 2887{
ba395927 2888 int i;
ba395927
KA
2889 struct pci_dev *pdev = to_pci_dev(hwdev);
2890 struct dmar_domain *domain;
f76aec76
KA
2891 size_t size = 0;
2892 int prot = 0;
b536d24d 2893 size_t offset_pfn = 0;
f76aec76
KA
2894 struct iova *iova = NULL;
2895 int ret;
c03ab37c 2896 struct scatterlist *sg;
b536d24d 2897 unsigned long start_vpfn;
8c11e798 2898 struct intel_iommu *iommu;
ba395927
KA
2899
2900 BUG_ON(dir == DMA_NONE);
73676832 2901 if (iommu_no_mapping(hwdev))
c03ab37c 2902 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2903
f76aec76
KA
2904 domain = get_valid_domain_for_dev(pdev);
2905 if (!domain)
2906 return 0;
2907
8c11e798
WH
2908 iommu = domain_get_iommu(domain);
2909
b536d24d 2910 for_each_sg(sglist, sg, nelems, i)
88cb6a74 2911 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 2912
5a5e02a6
DW
2913 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2914 pdev->dma_mask);
f76aec76 2915 if (!iova) {
c03ab37c 2916 sglist->dma_length = 0;
f76aec76
KA
2917 return 0;
2918 }
2919
2920 /*
2921 * Check if DMAR supports zero-length reads on write only
2922 * mappings..
2923 */
2924 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2925 !cap_zlr(iommu->cap))
f76aec76
KA
2926 prot |= DMA_PTE_READ;
2927 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2928 prot |= DMA_PTE_WRITE;
2929
b536d24d 2930 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 2931
f532959b 2932 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
2933 if (unlikely(ret)) {
2934 /* clear the page */
2935 dma_pte_clear_range(domain, start_vpfn,
2936 start_vpfn + size - 1);
2937 /* free page tables */
2938 dma_pte_free_pagetable(domain, start_vpfn,
2939 start_vpfn + size - 1);
2940 /* free iova */
2941 __free_iova(&domain->iovad, iova);
2942 return 0;
ba395927
KA
2943 }
2944
1f0ef2aa
DW
2945 /* it's a non-present to present mapping. Only flush if caching mode */
2946 if (cap_caching_mode(iommu->cap))
03d6a246 2947 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
1f0ef2aa 2948 else
8c11e798 2949 iommu_flush_write_buffer(iommu);
1f0ef2aa 2950
ba395927
KA
2951 return nelems;
2952}
2953
dfb805e8
FT
2954static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
2955{
2956 return !dma_addr;
2957}
2958
160c1d8e 2959struct dma_map_ops intel_dma_ops = {
ba395927
KA
2960 .alloc_coherent = intel_alloc_coherent,
2961 .free_coherent = intel_free_coherent,
ba395927
KA
2962 .map_sg = intel_map_sg,
2963 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
2964 .map_page = intel_map_page,
2965 .unmap_page = intel_unmap_page,
dfb805e8 2966 .mapping_error = intel_mapping_error,
ba395927
KA
2967};
2968
2969static inline int iommu_domain_cache_init(void)
2970{
2971 int ret = 0;
2972
2973 iommu_domain_cache = kmem_cache_create("iommu_domain",
2974 sizeof(struct dmar_domain),
2975 0,
2976 SLAB_HWCACHE_ALIGN,
2977
2978 NULL);
2979 if (!iommu_domain_cache) {
2980 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2981 ret = -ENOMEM;
2982 }
2983
2984 return ret;
2985}
2986
2987static inline int iommu_devinfo_cache_init(void)
2988{
2989 int ret = 0;
2990
2991 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2992 sizeof(struct device_domain_info),
2993 0,
2994 SLAB_HWCACHE_ALIGN,
ba395927
KA
2995 NULL);
2996 if (!iommu_devinfo_cache) {
2997 printk(KERN_ERR "Couldn't create devinfo cache\n");
2998 ret = -ENOMEM;
2999 }
3000
3001 return ret;
3002}
3003
3004static inline int iommu_iova_cache_init(void)
3005{
3006 int ret = 0;
3007
3008 iommu_iova_cache = kmem_cache_create("iommu_iova",
3009 sizeof(struct iova),
3010 0,
3011 SLAB_HWCACHE_ALIGN,
ba395927
KA
3012 NULL);
3013 if (!iommu_iova_cache) {
3014 printk(KERN_ERR "Couldn't create iova cache\n");
3015 ret = -ENOMEM;
3016 }
3017
3018 return ret;
3019}
3020
3021static int __init iommu_init_mempool(void)
3022{
3023 int ret;
3024 ret = iommu_iova_cache_init();
3025 if (ret)
3026 return ret;
3027
3028 ret = iommu_domain_cache_init();
3029 if (ret)
3030 goto domain_error;
3031
3032 ret = iommu_devinfo_cache_init();
3033 if (!ret)
3034 return ret;
3035
3036 kmem_cache_destroy(iommu_domain_cache);
3037domain_error:
3038 kmem_cache_destroy(iommu_iova_cache);
3039
3040 return -ENOMEM;
3041}
3042
3043static void __init iommu_exit_mempool(void)
3044{
3045 kmem_cache_destroy(iommu_devinfo_cache);
3046 kmem_cache_destroy(iommu_domain_cache);
3047 kmem_cache_destroy(iommu_iova_cache);
3048
3049}
3050
ba395927
KA
3051static void __init init_no_remapping_devices(void)
3052{
3053 struct dmar_drhd_unit *drhd;
3054
3055 for_each_drhd_unit(drhd) {
3056 if (!drhd->include_all) {
3057 int i;
3058 for (i = 0; i < drhd->devices_cnt; i++)
3059 if (drhd->devices[i] != NULL)
3060 break;
3061 /* ignore DMAR unit if no pci devices exist */
3062 if (i == drhd->devices_cnt)
3063 drhd->ignored = 1;
3064 }
3065 }
3066
3067 if (dmar_map_gfx)
3068 return;
3069
3070 for_each_drhd_unit(drhd) {
3071 int i;
3072 if (drhd->ignored || drhd->include_all)
3073 continue;
3074
3075 for (i = 0; i < drhd->devices_cnt; i++)
3076 if (drhd->devices[i] &&
3077 !IS_GFX_DEVICE(drhd->devices[i]))
3078 break;
3079
3080 if (i < drhd->devices_cnt)
3081 continue;
3082
3083 /* bypass IOMMU if it is just for gfx devices */
3084 drhd->ignored = 1;
3085 for (i = 0; i < drhd->devices_cnt; i++) {
3086 if (!drhd->devices[i])
3087 continue;
358dd8ac 3088 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3089 }
3090 }
3091}
3092
f59c7b69
FY
3093#ifdef CONFIG_SUSPEND
3094static int init_iommu_hw(void)
3095{
3096 struct dmar_drhd_unit *drhd;
3097 struct intel_iommu *iommu = NULL;
3098
3099 for_each_active_iommu(iommu, drhd)
3100 if (iommu->qi)
3101 dmar_reenable_qi(iommu);
3102
3103 for_each_active_iommu(iommu, drhd) {
3104 iommu_flush_write_buffer(iommu);
3105
3106 iommu_set_root_entry(iommu);
3107
3108 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3109 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3110 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3111 DMA_TLB_GLOBAL_FLUSH);
f59c7b69 3112 iommu_enable_translation(iommu);
b94996c9 3113 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3114 }
3115
3116 return 0;
3117}
3118
3119static void iommu_flush_all(void)
3120{
3121 struct dmar_drhd_unit *drhd;
3122 struct intel_iommu *iommu;
3123
3124 for_each_active_iommu(iommu, drhd) {
3125 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3126 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3127 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3128 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3129 }
3130}
3131
3132static int iommu_suspend(struct sys_device *dev, pm_message_t state)
3133{
3134 struct dmar_drhd_unit *drhd;
3135 struct intel_iommu *iommu = NULL;
3136 unsigned long flag;
3137
3138 for_each_active_iommu(iommu, drhd) {
3139 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3140 GFP_ATOMIC);
3141 if (!iommu->iommu_state)
3142 goto nomem;
3143 }
3144
3145 iommu_flush_all();
3146
3147 for_each_active_iommu(iommu, drhd) {
3148 iommu_disable_translation(iommu);
3149
3150 spin_lock_irqsave(&iommu->register_lock, flag);
3151
3152 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3153 readl(iommu->reg + DMAR_FECTL_REG);
3154 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3155 readl(iommu->reg + DMAR_FEDATA_REG);
3156 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3157 readl(iommu->reg + DMAR_FEADDR_REG);
3158 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3159 readl(iommu->reg + DMAR_FEUADDR_REG);
3160
3161 spin_unlock_irqrestore(&iommu->register_lock, flag);
3162 }
3163 return 0;
3164
3165nomem:
3166 for_each_active_iommu(iommu, drhd)
3167 kfree(iommu->iommu_state);
3168
3169 return -ENOMEM;
3170}
3171
3172static int iommu_resume(struct sys_device *dev)
3173{
3174 struct dmar_drhd_unit *drhd;
3175 struct intel_iommu *iommu = NULL;
3176 unsigned long flag;
3177
3178 if (init_iommu_hw()) {
3179 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3180 return -EIO;
3181 }
3182
3183 for_each_active_iommu(iommu, drhd) {
3184
3185 spin_lock_irqsave(&iommu->register_lock, flag);
3186
3187 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3188 iommu->reg + DMAR_FECTL_REG);
3189 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3190 iommu->reg + DMAR_FEDATA_REG);
3191 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3192 iommu->reg + DMAR_FEADDR_REG);
3193 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3194 iommu->reg + DMAR_FEUADDR_REG);
3195
3196 spin_unlock_irqrestore(&iommu->register_lock, flag);
3197 }
3198
3199 for_each_active_iommu(iommu, drhd)
3200 kfree(iommu->iommu_state);
3201
3202 return 0;
3203}
3204
3205static struct sysdev_class iommu_sysclass = {
3206 .name = "iommu",
3207 .resume = iommu_resume,
3208 .suspend = iommu_suspend,
3209};
3210
3211static struct sys_device device_iommu = {
3212 .cls = &iommu_sysclass,
3213};
3214
3215static int __init init_iommu_sysfs(void)
3216{
3217 int error;
3218
3219 error = sysdev_class_register(&iommu_sysclass);
3220 if (error)
3221 return error;
3222
3223 error = sysdev_register(&device_iommu);
3224 if (error)
3225 sysdev_class_unregister(&iommu_sysclass);
3226
3227 return error;
3228}
3229
3230#else
3231static int __init init_iommu_sysfs(void)
3232{
3233 return 0;
3234}
3235#endif /* CONFIG_PM */
3236
99dcaded
FY
3237/*
3238 * Here we only respond to action of unbound device from driver.
3239 *
3240 * Added device is not attached to its DMAR domain here yet. That will happen
3241 * when mapping the device to iova.
3242 */
3243static int device_notifier(struct notifier_block *nb,
3244 unsigned long action, void *data)
3245{
3246 struct device *dev = data;
3247 struct pci_dev *pdev = to_pci_dev(dev);
3248 struct dmar_domain *domain;
3249
3250 domain = find_domain(pdev);
3251 if (!domain)
3252 return 0;
3253
3254 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through)
3255 domain_remove_one_dev_info(domain, pdev);
3256
3257 return 0;
3258}
3259
3260static struct notifier_block device_nb = {
3261 .notifier_call = device_notifier,
3262};
3263
ba395927
KA
3264int __init intel_iommu_init(void)
3265{
3266 int ret = 0;
a59b50e9 3267 int force_on = 0;
ba395927 3268
a59b50e9
JC
3269 /* VT-d is required for a TXT/tboot launch, so enforce that */
3270 force_on = tboot_force_iommu();
3271
3272 if (dmar_table_init()) {
3273 if (force_on)
3274 panic("tboot: Failed to initialize DMAR table\n");
ba395927 3275 return -ENODEV;
a59b50e9 3276 }
ba395927 3277
a59b50e9
JC
3278 if (dmar_dev_scope_init()) {
3279 if (force_on)
3280 panic("tboot: Failed to initialize DMAR device scope\n");
1886e8a9 3281 return -ENODEV;
a59b50e9 3282 }
1886e8a9 3283
2ae21010
SS
3284 /*
3285 * Check the need for DMA-remapping initialization now.
3286 * Above initialization will also be used by Interrupt-remapping.
3287 */
75f1cdf1 3288 if (no_iommu || dmar_disabled)
2ae21010
SS
3289 return -ENODEV;
3290
ba395927
KA
3291 iommu_init_mempool();
3292 dmar_init_reserved_ranges();
3293
3294 init_no_remapping_devices();
3295
3296 ret = init_dmars();
3297 if (ret) {
a59b50e9
JC
3298 if (force_on)
3299 panic("tboot: Failed to initialize DMARs\n");
ba395927
KA
3300 printk(KERN_ERR "IOMMU: dmar init failed\n");
3301 put_iova_domain(&reserved_iova_list);
3302 iommu_exit_mempool();
3303 return ret;
3304 }
3305 printk(KERN_INFO
3306 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3307
5e0d2a6f 3308 init_timer(&unmap_timer);
75f1cdf1
FT
3309#ifdef CONFIG_SWIOTLB
3310 swiotlb = 0;
3311#endif
19943b0e 3312 dma_ops = &intel_dma_ops;
4ed0d3e6 3313
f59c7b69 3314 init_iommu_sysfs();
a8bcbb0d
JR
3315
3316 register_iommu(&intel_iommu_ops);
3317
99dcaded
FY
3318 bus_register_notifier(&pci_bus_type, &device_nb);
3319
ba395927
KA
3320 return 0;
3321}
e820482c 3322
3199aa6b
HW
3323static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3324 struct pci_dev *pdev)
3325{
3326 struct pci_dev *tmp, *parent;
3327
3328 if (!iommu || !pdev)
3329 return;
3330
3331 /* dependent device detach */
3332 tmp = pci_find_upstream_pcie_bridge(pdev);
3333 /* Secondary interface's bus number and devfn 0 */
3334 if (tmp) {
3335 parent = pdev->bus->self;
3336 while (parent != tmp) {
3337 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3338 parent->devfn);
3199aa6b
HW
3339 parent = parent->bus->self;
3340 }
3341 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
3342 iommu_detach_dev(iommu,
3343 tmp->subordinate->number, 0);
3344 else /* this is a legacy PCI bridge */
276dbf99
DW
3345 iommu_detach_dev(iommu, tmp->bus->number,
3346 tmp->devfn);
3199aa6b
HW
3347 }
3348}
3349
2c2e2c38 3350static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3351 struct pci_dev *pdev)
3352{
3353 struct device_domain_info *info;
3354 struct intel_iommu *iommu;
3355 unsigned long flags;
3356 int found = 0;
3357 struct list_head *entry, *tmp;
3358
276dbf99
DW
3359 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3360 pdev->devfn);
c7151a8d
WH
3361 if (!iommu)
3362 return;
3363
3364 spin_lock_irqsave(&device_domain_lock, flags);
3365 list_for_each_safe(entry, tmp, &domain->devices) {
3366 info = list_entry(entry, struct device_domain_info, link);
276dbf99 3367 /* No need to compare PCI domain; it has to be the same */
c7151a8d
WH
3368 if (info->bus == pdev->bus->number &&
3369 info->devfn == pdev->devfn) {
3370 list_del(&info->link);
3371 list_del(&info->global);
3372 if (info->dev)
3373 info->dev->dev.archdata.iommu = NULL;
3374 spin_unlock_irqrestore(&device_domain_lock, flags);
3375
93a23a72 3376 iommu_disable_dev_iotlb(info);
c7151a8d 3377 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3378 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3379 free_devinfo_mem(info);
3380
3381 spin_lock_irqsave(&device_domain_lock, flags);
3382
3383 if (found)
3384 break;
3385 else
3386 continue;
3387 }
3388
3389 /* if there is no other devices under the same iommu
3390 * owned by this domain, clear this iommu in iommu_bmp
3391 * update iommu count and coherency
3392 */
276dbf99
DW
3393 if (iommu == device_to_iommu(info->segment, info->bus,
3394 info->devfn))
c7151a8d
WH
3395 found = 1;
3396 }
3397
3398 if (found == 0) {
3399 unsigned long tmp_flags;
3400 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3401 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3402 domain->iommu_count--;
58c610bd 3403 domain_update_iommu_cap(domain);
c7151a8d
WH
3404 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3405 }
3406
3407 spin_unlock_irqrestore(&device_domain_lock, flags);
3408}
3409
3410static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3411{
3412 struct device_domain_info *info;
3413 struct intel_iommu *iommu;
3414 unsigned long flags1, flags2;
3415
3416 spin_lock_irqsave(&device_domain_lock, flags1);
3417 while (!list_empty(&domain->devices)) {
3418 info = list_entry(domain->devices.next,
3419 struct device_domain_info, link);
3420 list_del(&info->link);
3421 list_del(&info->global);
3422 if (info->dev)
3423 info->dev->dev.archdata.iommu = NULL;
3424
3425 spin_unlock_irqrestore(&device_domain_lock, flags1);
3426
93a23a72 3427 iommu_disable_dev_iotlb(info);
276dbf99 3428 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 3429 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3430 iommu_detach_dependent_devices(iommu, info->dev);
c7151a8d
WH
3431
3432 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 3433 * and capabilities
c7151a8d
WH
3434 */
3435 spin_lock_irqsave(&domain->iommu_lock, flags2);
3436 if (test_and_clear_bit(iommu->seq_id,
3437 &domain->iommu_bmp)) {
3438 domain->iommu_count--;
58c610bd 3439 domain_update_iommu_cap(domain);
c7151a8d
WH
3440 }
3441 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3442
3443 free_devinfo_mem(info);
3444 spin_lock_irqsave(&device_domain_lock, flags1);
3445 }
3446 spin_unlock_irqrestore(&device_domain_lock, flags1);
3447}
3448
5e98c4b1
WH
3449/* domain id for virtual machine, it won't be set in context */
3450static unsigned long vm_domid;
3451
fe40f1e0
WH
3452static int vm_domain_min_agaw(struct dmar_domain *domain)
3453{
3454 int i;
3455 int min_agaw = domain->agaw;
3456
3457 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
3458 for (; i < g_num_of_iommus; ) {
3459 if (min_agaw > g_iommus[i]->agaw)
3460 min_agaw = g_iommus[i]->agaw;
3461
3462 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
3463 }
3464
3465 return min_agaw;
3466}
3467
5e98c4b1
WH
3468static struct dmar_domain *iommu_alloc_vm_domain(void)
3469{
3470 struct dmar_domain *domain;
3471
3472 domain = alloc_domain_mem();
3473 if (!domain)
3474 return NULL;
3475
3476 domain->id = vm_domid++;
4c923d47 3477 domain->nid = -1;
5e98c4b1
WH
3478 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3479 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3480
3481 return domain;
3482}
3483
2c2e2c38 3484static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3485{
3486 int adjust_width;
3487
3488 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3489 spin_lock_init(&domain->iommu_lock);
3490
3491 domain_reserve_special_ranges(domain);
3492
3493 /* calculate AGAW */
3494 domain->gaw = guest_width;
3495 adjust_width = guestwidth_to_adjustwidth(guest_width);
3496 domain->agaw = width_to_agaw(adjust_width);
3497
3498 INIT_LIST_HEAD(&domain->devices);
3499
3500 domain->iommu_count = 0;
3501 domain->iommu_coherency = 0;
c5b15255 3502 domain->iommu_snooping = 0;
fe40f1e0 3503 domain->max_addr = 0;
4c923d47 3504 domain->nid = -1;
5e98c4b1
WH
3505
3506 /* always allocate the top pgd */
4c923d47 3507 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3508 if (!domain->pgd)
3509 return -ENOMEM;
3510 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3511 return 0;
3512}
3513
3514static void iommu_free_vm_domain(struct dmar_domain *domain)
3515{
3516 unsigned long flags;
3517 struct dmar_drhd_unit *drhd;
3518 struct intel_iommu *iommu;
3519 unsigned long i;
3520 unsigned long ndomains;
3521
3522 for_each_drhd_unit(drhd) {
3523 if (drhd->ignored)
3524 continue;
3525 iommu = drhd->iommu;
3526
3527 ndomains = cap_ndoms(iommu->cap);
3528 i = find_first_bit(iommu->domain_ids, ndomains);
3529 for (; i < ndomains; ) {
3530 if (iommu->domains[i] == domain) {
3531 spin_lock_irqsave(&iommu->lock, flags);
3532 clear_bit(i, iommu->domain_ids);
3533 iommu->domains[i] = NULL;
3534 spin_unlock_irqrestore(&iommu->lock, flags);
3535 break;
3536 }
3537 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
3538 }
3539 }
3540}
3541
3542static void vm_domain_exit(struct dmar_domain *domain)
3543{
5e98c4b1
WH
3544 /* Domain 0 is reserved, so dont process it */
3545 if (!domain)
3546 return;
3547
3548 vm_domain_remove_all_dev_info(domain);
3549 /* destroy iovas */
3550 put_iova_domain(&domain->iovad);
5e98c4b1
WH
3551
3552 /* clear ptes */
595badf5 3553 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3554
3555 /* free page tables */
d794dc9b 3556 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3557
3558 iommu_free_vm_domain(domain);
3559 free_domain_mem(domain);
3560}
3561
5d450806 3562static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3563{
5d450806 3564 struct dmar_domain *dmar_domain;
38717946 3565
5d450806
JR
3566 dmar_domain = iommu_alloc_vm_domain();
3567 if (!dmar_domain) {
38717946 3568 printk(KERN_ERR
5d450806
JR
3569 "intel_iommu_domain_init: dmar_domain == NULL\n");
3570 return -ENOMEM;
38717946 3571 }
2c2e2c38 3572 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3573 printk(KERN_ERR
5d450806
JR
3574 "intel_iommu_domain_init() failed\n");
3575 vm_domain_exit(dmar_domain);
3576 return -ENOMEM;
38717946 3577 }
5d450806 3578 domain->priv = dmar_domain;
faa3d6f5 3579
5d450806 3580 return 0;
38717946 3581}
38717946 3582
5d450806 3583static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3584{
5d450806
JR
3585 struct dmar_domain *dmar_domain = domain->priv;
3586
3587 domain->priv = NULL;
3588 vm_domain_exit(dmar_domain);
38717946 3589}
38717946 3590
4c5478c9
JR
3591static int intel_iommu_attach_device(struct iommu_domain *domain,
3592 struct device *dev)
38717946 3593{
4c5478c9
JR
3594 struct dmar_domain *dmar_domain = domain->priv;
3595 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3596 struct intel_iommu *iommu;
3597 int addr_width;
3598 u64 end;
faa3d6f5
WH
3599
3600 /* normally pdev is not mapped */
3601 if (unlikely(domain_context_mapped(pdev))) {
3602 struct dmar_domain *old_domain;
3603
3604 old_domain = find_domain(pdev);
3605 if (old_domain) {
2c2e2c38
FY
3606 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3607 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3608 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3609 else
3610 domain_remove_dev_info(old_domain);
3611 }
3612 }
3613
276dbf99
DW
3614 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3615 pdev->devfn);
fe40f1e0
WH
3616 if (!iommu)
3617 return -ENODEV;
3618
3619 /* check if this iommu agaw is sufficient for max mapped address */
3620 addr_width = agaw_to_width(iommu->agaw);
3621 end = DOMAIN_MAX_ADDR(addr_width);
3622 end = end & VTD_PAGE_MASK;
4c5478c9 3623 if (end < dmar_domain->max_addr) {
fe40f1e0
WH
3624 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3625 "sufficient for the mapped address (%llx)\n",
4c5478c9 3626 __func__, iommu->agaw, dmar_domain->max_addr);
fe40f1e0
WH
3627 return -EFAULT;
3628 }
3629
5fe60f4e 3630 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 3631}
38717946 3632
4c5478c9
JR
3633static void intel_iommu_detach_device(struct iommu_domain *domain,
3634 struct device *dev)
38717946 3635{
4c5478c9
JR
3636 struct dmar_domain *dmar_domain = domain->priv;
3637 struct pci_dev *pdev = to_pci_dev(dev);
3638
2c2e2c38 3639 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3640}
c7151a8d 3641
dde57a21
JR
3642static int intel_iommu_map_range(struct iommu_domain *domain,
3643 unsigned long iova, phys_addr_t hpa,
3644 size_t size, int iommu_prot)
faa3d6f5 3645{
dde57a21 3646 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
3647 u64 max_addr;
3648 int addr_width;
dde57a21 3649 int prot = 0;
faa3d6f5 3650 int ret;
fe40f1e0 3651
dde57a21
JR
3652 if (iommu_prot & IOMMU_READ)
3653 prot |= DMA_PTE_READ;
3654 if (iommu_prot & IOMMU_WRITE)
3655 prot |= DMA_PTE_WRITE;
9cf06697
SY
3656 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3657 prot |= DMA_PTE_SNP;
dde57a21 3658
163cc52c 3659 max_addr = iova + size;
dde57a21 3660 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3661 int min_agaw;
3662 u64 end;
3663
3664 /* check if minimum agaw is sufficient for mapped address */
dde57a21 3665 min_agaw = vm_domain_min_agaw(dmar_domain);
fe40f1e0
WH
3666 addr_width = agaw_to_width(min_agaw);
3667 end = DOMAIN_MAX_ADDR(addr_width);
3668 end = end & VTD_PAGE_MASK;
3669 if (end < max_addr) {
3670 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3671 "sufficient for the mapped address (%llx)\n",
3672 __func__, min_agaw, max_addr);
3673 return -EFAULT;
3674 }
dde57a21 3675 dmar_domain->max_addr = max_addr;
fe40f1e0 3676 }
ad051221
DW
3677 /* Round up size to next multiple of PAGE_SIZE, if it and
3678 the low bits of hpa would take us onto the next page */
88cb6a74 3679 size = aligned_nrpages(hpa, size);
ad051221
DW
3680 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3681 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 3682 return ret;
38717946 3683}
38717946 3684
dde57a21
JR
3685static void intel_iommu_unmap_range(struct iommu_domain *domain,
3686 unsigned long iova, size_t size)
38717946 3687{
dde57a21 3688 struct dmar_domain *dmar_domain = domain->priv;
faa3d6f5 3689
4b99d352
SY
3690 if (!size)
3691 return;
3692
163cc52c
DW
3693 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3694 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 3695
163cc52c
DW
3696 if (dmar_domain->max_addr == iova + size)
3697 dmar_domain->max_addr = iova;
38717946 3698}
38717946 3699
d14d6577
JR
3700static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3701 unsigned long iova)
38717946 3702{
d14d6577 3703 struct dmar_domain *dmar_domain = domain->priv;
38717946 3704 struct dma_pte *pte;
faa3d6f5 3705 u64 phys = 0;
38717946 3706
b026fd28 3707 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
38717946 3708 if (pte)
faa3d6f5 3709 phys = dma_pte_addr(pte);
38717946 3710
faa3d6f5 3711 return phys;
38717946 3712}
a8bcbb0d 3713
dbb9fd86
SY
3714static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3715 unsigned long cap)
3716{
3717 struct dmar_domain *dmar_domain = domain->priv;
3718
3719 if (cap == IOMMU_CAP_CACHE_COHERENCY)
3720 return dmar_domain->iommu_snooping;
3721
3722 return 0;
3723}
3724
a8bcbb0d
JR
3725static struct iommu_ops intel_iommu_ops = {
3726 .domain_init = intel_iommu_domain_init,
3727 .domain_destroy = intel_iommu_domain_destroy,
3728 .attach_dev = intel_iommu_attach_device,
3729 .detach_dev = intel_iommu_detach_device,
3730 .map = intel_iommu_map_range,
3731 .unmap = intel_iommu_unmap_range,
3732 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 3733 .domain_has_cap = intel_iommu_domain_has_cap,
a8bcbb0d 3734};
9af88143
DW
3735
3736static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3737{
3738 /*
3739 * Mobile 4 Series Chipset neglects to set RWBF capability,
3740 * but needs it:
3741 */
3742 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3743 rwbf_quirk = 1;
3744}
3745
3746DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
e0fc7e0b
DW
3747
3748/* On Tylersburg chipsets, some BIOSes have been known to enable the
3749 ISOCH DMAR unit for the Azalia sound device, but not give it any
3750 TLB entries, which causes it to deadlock. Check for that. We do
3751 this in a function called from init_dmars(), instead of in a PCI
3752 quirk, because we don't want to print the obnoxious "BIOS broken"
3753 message if VT-d is actually disabled.
3754*/
3755static void __init check_tylersburg_isoch(void)
3756{
3757 struct pci_dev *pdev;
3758 uint32_t vtisochctrl;
3759
3760 /* If there's no Azalia in the system anyway, forget it. */
3761 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3762 if (!pdev)
3763 return;
3764 pci_dev_put(pdev);
3765
3766 /* System Management Registers. Might be hidden, in which case
3767 we can't do the sanity check. But that's OK, because the
3768 known-broken BIOSes _don't_ actually hide it, so far. */
3769 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3770 if (!pdev)
3771 return;
3772
3773 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3774 pci_dev_put(pdev);
3775 return;
3776 }
3777
3778 pci_dev_put(pdev);
3779
3780 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3781 if (vtisochctrl & 1)
3782 return;
3783
3784 /* Drop all bits other than the number of TLB entries */
3785 vtisochctrl &= 0x1c;
3786
3787 /* If we have the recommended number of TLB entries (16), fine. */
3788 if (vtisochctrl == 0x10)
3789 return;
3790
3791 /* Zero TLB entries? You get to ride the short bus to school. */
3792 if (!vtisochctrl) {
3793 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
3794 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3795 dmi_get_system_info(DMI_BIOS_VENDOR),
3796 dmi_get_system_info(DMI_BIOS_VERSION),
3797 dmi_get_system_info(DMI_PRODUCT_VERSION));
3798 iommu_identity_mapping |= IDENTMAP_AZALIA;
3799 return;
3800 }
3801
3802 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
3803 vtisochctrl);
3804}