Fix common misspellings
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946 36#include <linux/iova.h>
5d450806 37#include <linux/iommu.h>
38717946 38#include <linux/intel-iommu.h>
134fac3f 39#include <linux/syscore_ops.h>
69575d38 40#include <linux/tboot.h>
adb2fe02 41#include <linux/dmi.h>
ba395927 42#include <asm/cacheflush.h>
46a7fa27 43#include <asm/iommu.h>
ba395927
KA
44#include "pci.h"
45
5b6985ce
FY
46#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE
48
ba395927
KA
49#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 51#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
52
53#define IOAPIC_RANGE_START (0xfee00000)
54#define IOAPIC_RANGE_END (0xfeefffff)
55#define IOVA_START_ADDR (0x1000)
56
57#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
58
4ed0d3e6
FY
59#define MAX_AGAW_WIDTH 64
60
2ebe3151
DW
61#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
62#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
63
64/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
65 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
66#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
67 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
68#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 69
f27be03b 70#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 71#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 72#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 73
df08cdc7
AM
74/* page table handling */
75#define LEVEL_STRIDE (9)
76#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
77
78static inline int agaw_to_level(int agaw)
79{
80 return agaw + 2;
81}
82
83static inline int agaw_to_width(int agaw)
84{
85 return 30 + agaw * LEVEL_STRIDE;
86}
87
88static inline int width_to_agaw(int width)
89{
90 return (width - 30) / LEVEL_STRIDE;
91}
92
93static inline unsigned int level_to_offset_bits(int level)
94{
95 return (level - 1) * LEVEL_STRIDE;
96}
97
98static inline int pfn_level_offset(unsigned long pfn, int level)
99{
100 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
101}
102
103static inline unsigned long level_mask(int level)
104{
105 return -1UL << level_to_offset_bits(level);
106}
107
108static inline unsigned long level_size(int level)
109{
110 return 1UL << level_to_offset_bits(level);
111}
112
113static inline unsigned long align_to_level(unsigned long pfn, int level)
114{
115 return (pfn + level_size(level) - 1) & level_mask(level);
116}
fd18de50 117
dd4e8319
DW
118/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
119 are never going to work. */
120static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
121{
122 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
123}
124
125static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
126{
127 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
128}
129static inline unsigned long page_to_dma_pfn(struct page *pg)
130{
131 return mm_to_dma_pfn(page_to_pfn(pg));
132}
133static inline unsigned long virt_to_dma_pfn(void *p)
134{
135 return page_to_dma_pfn(virt_to_page(p));
136}
137
d9630fe9
WH
138/* global iommu list, set NULL for ignored DMAR units */
139static struct intel_iommu **g_iommus;
140
e0fc7e0b 141static void __init check_tylersburg_isoch(void);
9af88143
DW
142static int rwbf_quirk;
143
46b08e1a
MM
144/*
145 * 0: Present
146 * 1-11: Reserved
147 * 12-63: Context Ptr (12 - (haw-1))
148 * 64-127: Reserved
149 */
150struct root_entry {
151 u64 val;
152 u64 rsvd1;
153};
154#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
155static inline bool root_present(struct root_entry *root)
156{
157 return (root->val & 1);
158}
159static inline void set_root_present(struct root_entry *root)
160{
161 root->val |= 1;
162}
163static inline void set_root_value(struct root_entry *root, unsigned long value)
164{
165 root->val |= value & VTD_PAGE_MASK;
166}
167
168static inline struct context_entry *
169get_context_addr_from_root(struct root_entry *root)
170{
171 return (struct context_entry *)
172 (root_present(root)?phys_to_virt(
173 root->val & VTD_PAGE_MASK) :
174 NULL);
175}
176
7a8fc25e
MM
177/*
178 * low 64 bits:
179 * 0: present
180 * 1: fault processing disable
181 * 2-3: translation type
182 * 12-63: address space root
183 * high 64 bits:
184 * 0-2: address width
185 * 3-6: aval
186 * 8-23: domain id
187 */
188struct context_entry {
189 u64 lo;
190 u64 hi;
191};
c07e7d21
MM
192
193static inline bool context_present(struct context_entry *context)
194{
195 return (context->lo & 1);
196}
197static inline void context_set_present(struct context_entry *context)
198{
199 context->lo |= 1;
200}
201
202static inline void context_set_fault_enable(struct context_entry *context)
203{
204 context->lo &= (((u64)-1) << 2) | 1;
205}
206
c07e7d21
MM
207static inline void context_set_translation_type(struct context_entry *context,
208 unsigned long value)
209{
210 context->lo &= (((u64)-1) << 4) | 3;
211 context->lo |= (value & 3) << 2;
212}
213
214static inline void context_set_address_root(struct context_entry *context,
215 unsigned long value)
216{
217 context->lo |= value & VTD_PAGE_MASK;
218}
219
220static inline void context_set_address_width(struct context_entry *context,
221 unsigned long value)
222{
223 context->hi |= value & 7;
224}
225
226static inline void context_set_domain_id(struct context_entry *context,
227 unsigned long value)
228{
229 context->hi |= (value & ((1 << 16) - 1)) << 8;
230}
231
232static inline void context_clear_entry(struct context_entry *context)
233{
234 context->lo = 0;
235 context->hi = 0;
236}
7a8fc25e 237
622ba12a
MM
238/*
239 * 0: readable
240 * 1: writable
241 * 2-6: reserved
242 * 7: super page
9cf06697
SY
243 * 8-10: available
244 * 11: snoop behavior
622ba12a
MM
245 * 12-63: Host physcial address
246 */
247struct dma_pte {
248 u64 val;
249};
622ba12a 250
19c239ce
MM
251static inline void dma_clear_pte(struct dma_pte *pte)
252{
253 pte->val = 0;
254}
255
256static inline void dma_set_pte_readable(struct dma_pte *pte)
257{
258 pte->val |= DMA_PTE_READ;
259}
260
261static inline void dma_set_pte_writable(struct dma_pte *pte)
262{
263 pte->val |= DMA_PTE_WRITE;
264}
265
9cf06697
SY
266static inline void dma_set_pte_snp(struct dma_pte *pte)
267{
268 pte->val |= DMA_PTE_SNP;
269}
270
19c239ce
MM
271static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
272{
273 pte->val = (pte->val & ~3) | (prot & 3);
274}
275
276static inline u64 dma_pte_addr(struct dma_pte *pte)
277{
c85994e4
DW
278#ifdef CONFIG_64BIT
279 return pte->val & VTD_PAGE_MASK;
280#else
281 /* Must have a full atomic 64-bit read */
1a8bd481 282 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 283#endif
19c239ce
MM
284}
285
dd4e8319 286static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
19c239ce 287{
dd4e8319 288 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
19c239ce
MM
289}
290
291static inline bool dma_pte_present(struct dma_pte *pte)
292{
293 return (pte->val & 3) != 0;
294}
622ba12a 295
75e6bf96
DW
296static inline int first_pte_in_page(struct dma_pte *pte)
297{
298 return !((unsigned long)pte & ~VTD_PAGE_MASK);
299}
300
2c2e2c38
FY
301/*
302 * This domain is a statically identity mapping domain.
303 * 1. This domain creats a static 1:1 mapping to all usable memory.
304 * 2. It maps to each iommu if successful.
305 * 3. Each iommu mapps to this domain if successful.
306 */
19943b0e
DW
307static struct dmar_domain *si_domain;
308static int hw_pass_through = 1;
2c2e2c38 309
3b5410e7 310/* devices under the same p2p bridge are owned in one domain */
cdc7b837 311#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 312
1ce28feb
WH
313/* domain represents a virtual machine, more than one devices
314 * across iommus may be owned in one domain, e.g. kvm guest.
315 */
316#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
317
2c2e2c38
FY
318/* si_domain contains mulitple devices */
319#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
320
99126f7c
MM
321struct dmar_domain {
322 int id; /* domain id */
4c923d47 323 int nid; /* node id */
8c11e798 324 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
325
326 struct list_head devices; /* all devices' list */
327 struct iova_domain iovad; /* iova's that belong to this domain */
328
329 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
330 int gaw; /* max guest address width */
331
332 /* adjusted guest address width, 0 is level 2 30-bit */
333 int agaw;
334
3b5410e7 335 int flags; /* flags to find out type of domain */
8e604097
WH
336
337 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 338 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d
WH
339 int iommu_count; /* reference count of iommu */
340 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 341 u64 max_addr; /* maximum mapped address */
99126f7c
MM
342};
343
a647dacb
MM
344/* PCI domain-device relationship */
345struct device_domain_info {
346 struct list_head link; /* link to domain siblings */
347 struct list_head global; /* link to global list */
276dbf99
DW
348 int segment; /* PCI domain */
349 u8 bus; /* PCI bus number */
a647dacb 350 u8 devfn; /* PCI devfn number */
45e829ea 351 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 352 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
353 struct dmar_domain *domain; /* pointer to domain */
354};
355
5e0d2a6f 356static void flush_unmaps_timeout(unsigned long data);
357
358DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
359
80b20dd8 360#define HIGH_WATER_MARK 250
361struct deferred_flush_tables {
362 int next;
363 struct iova *iova[HIGH_WATER_MARK];
364 struct dmar_domain *domain[HIGH_WATER_MARK];
365};
366
367static struct deferred_flush_tables *deferred_flush;
368
5e0d2a6f 369/* bitmap for indexing intel_iommus */
5e0d2a6f 370static int g_num_of_iommus;
371
372static DEFINE_SPINLOCK(async_umap_flush_lock);
373static LIST_HEAD(unmaps_to_do);
374
375static int timer_on;
376static long list_size;
5e0d2a6f 377
ba395927
KA
378static void domain_remove_dev_info(struct dmar_domain *domain);
379
0cd5c3c8
KM
380#ifdef CONFIG_DMAR_DEFAULT_ON
381int dmar_disabled = 0;
382#else
383int dmar_disabled = 1;
384#endif /*CONFIG_DMAR_DEFAULT_ON*/
385
2d9e667e 386static int dmar_map_gfx = 1;
7d3b03ce 387static int dmar_forcedac;
5e0d2a6f 388static int intel_iommu_strict;
ba395927
KA
389
390#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
391static DEFINE_SPINLOCK(device_domain_lock);
392static LIST_HEAD(device_domain_list);
393
a8bcbb0d
JR
394static struct iommu_ops intel_iommu_ops;
395
ba395927
KA
396static int __init intel_iommu_setup(char *str)
397{
398 if (!str)
399 return -EINVAL;
400 while (*str) {
0cd5c3c8
KM
401 if (!strncmp(str, "on", 2)) {
402 dmar_disabled = 0;
403 printk(KERN_INFO "Intel-IOMMU: enabled\n");
404 } else if (!strncmp(str, "off", 3)) {
ba395927 405 dmar_disabled = 1;
0cd5c3c8 406 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
407 } else if (!strncmp(str, "igfx_off", 8)) {
408 dmar_map_gfx = 0;
409 printk(KERN_INFO
410 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 411 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 412 printk(KERN_INFO
7d3b03ce
KA
413 "Intel-IOMMU: Forcing DAC for PCI devices\n");
414 dmar_forcedac = 1;
5e0d2a6f 415 } else if (!strncmp(str, "strict", 6)) {
416 printk(KERN_INFO
417 "Intel-IOMMU: disable batched IOTLB flush\n");
418 intel_iommu_strict = 1;
ba395927
KA
419 }
420
421 str += strcspn(str, ",");
422 while (*str == ',')
423 str++;
424 }
425 return 0;
426}
427__setup("intel_iommu=", intel_iommu_setup);
428
429static struct kmem_cache *iommu_domain_cache;
430static struct kmem_cache *iommu_devinfo_cache;
431static struct kmem_cache *iommu_iova_cache;
432
4c923d47 433static inline void *alloc_pgtable_page(int node)
eb3fa7cb 434{
4c923d47
SS
435 struct page *page;
436 void *vaddr = NULL;
eb3fa7cb 437
4c923d47
SS
438 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
439 if (page)
440 vaddr = page_address(page);
eb3fa7cb 441 return vaddr;
ba395927
KA
442}
443
444static inline void free_pgtable_page(void *vaddr)
445{
446 free_page((unsigned long)vaddr);
447}
448
449static inline void *alloc_domain_mem(void)
450{
354bb65e 451 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
452}
453
38717946 454static void free_domain_mem(void *vaddr)
ba395927
KA
455{
456 kmem_cache_free(iommu_domain_cache, vaddr);
457}
458
459static inline void * alloc_devinfo_mem(void)
460{
354bb65e 461 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
462}
463
464static inline void free_devinfo_mem(void *vaddr)
465{
466 kmem_cache_free(iommu_devinfo_cache, vaddr);
467}
468
469struct iova *alloc_iova_mem(void)
470{
354bb65e 471 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
472}
473
474void free_iova_mem(struct iova *iova)
475{
476 kmem_cache_free(iommu_iova_cache, iova);
477}
478
1b573683 479
4ed0d3e6 480static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
481{
482 unsigned long sagaw;
483 int agaw = -1;
484
485 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 486 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
487 agaw >= 0; agaw--) {
488 if (test_bit(agaw, &sagaw))
489 break;
490 }
491
492 return agaw;
493}
494
4ed0d3e6
FY
495/*
496 * Calculate max SAGAW for each iommu.
497 */
498int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
499{
500 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
501}
502
503/*
504 * calculate agaw for each iommu.
505 * "SAGAW" may be different across iommus, use a default agaw, and
506 * get a supported less agaw for iommus that don't support the default agaw.
507 */
508int iommu_calculate_agaw(struct intel_iommu *iommu)
509{
510 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
511}
512
2c2e2c38 513/* This functionin only returns single iommu in a domain */
8c11e798
WH
514static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
515{
516 int iommu_id;
517
2c2e2c38 518 /* si_domain and vm domain should not get here. */
1ce28feb 519 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 520 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 521
8c11e798
WH
522 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
523 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
524 return NULL;
525
526 return g_iommus[iommu_id];
527}
528
8e604097
WH
529static void domain_update_iommu_coherency(struct dmar_domain *domain)
530{
531 int i;
532
533 domain->iommu_coherency = 1;
534
a45946ab 535 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
8e604097
WH
536 if (!ecap_coherent(g_iommus[i]->ecap)) {
537 domain->iommu_coherency = 0;
538 break;
539 }
8e604097
WH
540 }
541}
542
58c610bd
SY
543static void domain_update_iommu_snooping(struct dmar_domain *domain)
544{
545 int i;
546
547 domain->iommu_snooping = 1;
548
a45946ab 549 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
550 if (!ecap_sc_support(g_iommus[i]->ecap)) {
551 domain->iommu_snooping = 0;
552 break;
553 }
58c610bd
SY
554 }
555}
556
557/* Some capabilities may be different across iommus */
558static void domain_update_iommu_cap(struct dmar_domain *domain)
559{
560 domain_update_iommu_coherency(domain);
561 domain_update_iommu_snooping(domain);
562}
563
276dbf99 564static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
565{
566 struct dmar_drhd_unit *drhd = NULL;
567 int i;
568
569 for_each_drhd_unit(drhd) {
570 if (drhd->ignored)
571 continue;
276dbf99
DW
572 if (segment != drhd->segment)
573 continue;
c7151a8d 574
924b6231 575 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
576 if (drhd->devices[i] &&
577 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
578 drhd->devices[i]->devfn == devfn)
579 return drhd->iommu;
4958c5dc
DW
580 if (drhd->devices[i] &&
581 drhd->devices[i]->subordinate &&
924b6231
DW
582 drhd->devices[i]->subordinate->number <= bus &&
583 drhd->devices[i]->subordinate->subordinate >= bus)
584 return drhd->iommu;
585 }
c7151a8d
WH
586
587 if (drhd->include_all)
588 return drhd->iommu;
589 }
590
591 return NULL;
592}
593
5331fe6f
WH
594static void domain_flush_cache(struct dmar_domain *domain,
595 void *addr, int size)
596{
597 if (!domain->iommu_coherency)
598 clflush_cache_range(addr, size);
599}
600
ba395927
KA
601/* Gets context entry for a given bus and devfn */
602static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
603 u8 bus, u8 devfn)
604{
605 struct root_entry *root;
606 struct context_entry *context;
607 unsigned long phy_addr;
608 unsigned long flags;
609
610 spin_lock_irqsave(&iommu->lock, flags);
611 root = &iommu->root_entry[bus];
612 context = get_context_addr_from_root(root);
613 if (!context) {
4c923d47
SS
614 context = (struct context_entry *)
615 alloc_pgtable_page(iommu->node);
ba395927
KA
616 if (!context) {
617 spin_unlock_irqrestore(&iommu->lock, flags);
618 return NULL;
619 }
5b6985ce 620 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
621 phy_addr = virt_to_phys((void *)context);
622 set_root_value(root, phy_addr);
623 set_root_present(root);
624 __iommu_flush_cache(iommu, root, sizeof(*root));
625 }
626 spin_unlock_irqrestore(&iommu->lock, flags);
627 return &context[devfn];
628}
629
630static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
631{
632 struct root_entry *root;
633 struct context_entry *context;
634 int ret;
635 unsigned long flags;
636
637 spin_lock_irqsave(&iommu->lock, flags);
638 root = &iommu->root_entry[bus];
639 context = get_context_addr_from_root(root);
640 if (!context) {
641 ret = 0;
642 goto out;
643 }
c07e7d21 644 ret = context_present(&context[devfn]);
ba395927
KA
645out:
646 spin_unlock_irqrestore(&iommu->lock, flags);
647 return ret;
648}
649
650static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
651{
652 struct root_entry *root;
653 struct context_entry *context;
654 unsigned long flags;
655
656 spin_lock_irqsave(&iommu->lock, flags);
657 root = &iommu->root_entry[bus];
658 context = get_context_addr_from_root(root);
659 if (context) {
c07e7d21 660 context_clear_entry(&context[devfn]);
ba395927
KA
661 __iommu_flush_cache(iommu, &context[devfn], \
662 sizeof(*context));
663 }
664 spin_unlock_irqrestore(&iommu->lock, flags);
665}
666
667static void free_context_table(struct intel_iommu *iommu)
668{
669 struct root_entry *root;
670 int i;
671 unsigned long flags;
672 struct context_entry *context;
673
674 spin_lock_irqsave(&iommu->lock, flags);
675 if (!iommu->root_entry) {
676 goto out;
677 }
678 for (i = 0; i < ROOT_ENTRY_NR; i++) {
679 root = &iommu->root_entry[i];
680 context = get_context_addr_from_root(root);
681 if (context)
682 free_pgtable_page(context);
683 }
684 free_pgtable_page(iommu->root_entry);
685 iommu->root_entry = NULL;
686out:
687 spin_unlock_irqrestore(&iommu->lock, flags);
688}
689
b026fd28
DW
690static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
691 unsigned long pfn)
ba395927 692{
b026fd28 693 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
694 struct dma_pte *parent, *pte = NULL;
695 int level = agaw_to_level(domain->agaw);
696 int offset;
ba395927
KA
697
698 BUG_ON(!domain->pgd);
b026fd28 699 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
ba395927
KA
700 parent = domain->pgd;
701
ba395927
KA
702 while (level > 0) {
703 void *tmp_page;
704
b026fd28 705 offset = pfn_level_offset(pfn, level);
ba395927
KA
706 pte = &parent[offset];
707 if (level == 1)
708 break;
709
19c239ce 710 if (!dma_pte_present(pte)) {
c85994e4
DW
711 uint64_t pteval;
712
4c923d47 713 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 714
206a73c1 715 if (!tmp_page)
ba395927 716 return NULL;
206a73c1 717
c85994e4 718 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 719 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
720 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
721 /* Someone else set it while we were thinking; use theirs. */
722 free_pgtable_page(tmp_page);
723 } else {
724 dma_pte_addr(pte);
725 domain_flush_cache(domain, pte, sizeof(*pte));
726 }
ba395927 727 }
19c239ce 728 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
729 level--;
730 }
731
ba395927
KA
732 return pte;
733}
734
735/* return address's pte at specific level */
90dcfb5e
DW
736static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
737 unsigned long pfn,
738 int level)
ba395927
KA
739{
740 struct dma_pte *parent, *pte = NULL;
741 int total = agaw_to_level(domain->agaw);
742 int offset;
743
744 parent = domain->pgd;
745 while (level <= total) {
90dcfb5e 746 offset = pfn_level_offset(pfn, total);
ba395927
KA
747 pte = &parent[offset];
748 if (level == total)
749 return pte;
750
19c239ce 751 if (!dma_pte_present(pte))
ba395927 752 break;
19c239ce 753 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
754 total--;
755 }
756 return NULL;
757}
758
ba395927 759/* clear last level pte, a tlb flush should be followed */
595badf5
DW
760static void dma_pte_clear_range(struct dmar_domain *domain,
761 unsigned long start_pfn,
762 unsigned long last_pfn)
ba395927 763{
04b18e65 764 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
310a5ab9 765 struct dma_pte *first_pte, *pte;
66eae846 766
04b18e65 767 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 768 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 769 BUG_ON(start_pfn > last_pfn);
ba395927 770
04b18e65 771 /* we don't need lock here; nobody else touches the iova range */
59c36286 772 do {
310a5ab9
DW
773 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
774 if (!pte) {
775 start_pfn = align_to_level(start_pfn + 1, 2);
776 continue;
777 }
75e6bf96 778 do {
310a5ab9
DW
779 dma_clear_pte(pte);
780 start_pfn++;
781 pte++;
75e6bf96
DW
782 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
783
310a5ab9
DW
784 domain_flush_cache(domain, first_pte,
785 (void *)pte - (void *)first_pte);
59c36286
DW
786
787 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
788}
789
790/* free page table pages. last level pte should already be cleared */
791static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
792 unsigned long start_pfn,
793 unsigned long last_pfn)
ba395927 794{
6660c63a 795 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
f3a0a52f 796 struct dma_pte *first_pte, *pte;
ba395927
KA
797 int total = agaw_to_level(domain->agaw);
798 int level;
6660c63a 799 unsigned long tmp;
ba395927 800
6660c63a
DW
801 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
802 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 803 BUG_ON(start_pfn > last_pfn);
ba395927 804
f3a0a52f 805 /* We don't need lock here; nobody else touches the iova range */
ba395927
KA
806 level = 2;
807 while (level <= total) {
6660c63a
DW
808 tmp = align_to_level(start_pfn, level);
809
f3a0a52f 810 /* If we can't even clear one PTE at this level, we're done */
6660c63a 811 if (tmp + level_size(level) - 1 > last_pfn)
ba395927
KA
812 return;
813
59c36286 814 do {
f3a0a52f
DW
815 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
816 if (!pte) {
817 tmp = align_to_level(tmp + 1, level + 1);
818 continue;
819 }
75e6bf96 820 do {
6a43e574
DW
821 if (dma_pte_present(pte)) {
822 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
823 dma_clear_pte(pte);
824 }
f3a0a52f
DW
825 pte++;
826 tmp += level_size(level);
75e6bf96
DW
827 } while (!first_pte_in_page(pte) &&
828 tmp + level_size(level) - 1 <= last_pfn);
829
f3a0a52f
DW
830 domain_flush_cache(domain, first_pte,
831 (void *)pte - (void *)first_pte);
832
59c36286 833 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
ba395927
KA
834 level++;
835 }
836 /* free pgd */
d794dc9b 837 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
838 free_pgtable_page(domain->pgd);
839 domain->pgd = NULL;
840 }
841}
842
843/* iommu handling */
844static int iommu_alloc_root_entry(struct intel_iommu *iommu)
845{
846 struct root_entry *root;
847 unsigned long flags;
848
4c923d47 849 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
850 if (!root)
851 return -ENOMEM;
852
5b6985ce 853 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
854
855 spin_lock_irqsave(&iommu->lock, flags);
856 iommu->root_entry = root;
857 spin_unlock_irqrestore(&iommu->lock, flags);
858
859 return 0;
860}
861
ba395927
KA
862static void iommu_set_root_entry(struct intel_iommu *iommu)
863{
864 void *addr;
c416daa9 865 u32 sts;
ba395927
KA
866 unsigned long flag;
867
868 addr = iommu->root_entry;
869
870 spin_lock_irqsave(&iommu->register_lock, flag);
871 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
872
c416daa9 873 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
874
875 /* Make sure hardware complete it */
876 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 877 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927
KA
878
879 spin_unlock_irqrestore(&iommu->register_lock, flag);
880}
881
882static void iommu_flush_write_buffer(struct intel_iommu *iommu)
883{
884 u32 val;
885 unsigned long flag;
886
9af88143 887 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 888 return;
ba395927
KA
889
890 spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 891 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
892
893 /* Make sure hardware complete it */
894 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 895 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927
KA
896
897 spin_unlock_irqrestore(&iommu->register_lock, flag);
898}
899
900/* return value determine if we need a write buffer flush */
4c25a2c1
DW
901static void __iommu_flush_context(struct intel_iommu *iommu,
902 u16 did, u16 source_id, u8 function_mask,
903 u64 type)
ba395927
KA
904{
905 u64 val = 0;
906 unsigned long flag;
907
ba395927
KA
908 switch (type) {
909 case DMA_CCMD_GLOBAL_INVL:
910 val = DMA_CCMD_GLOBAL_INVL;
911 break;
912 case DMA_CCMD_DOMAIN_INVL:
913 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
914 break;
915 case DMA_CCMD_DEVICE_INVL:
916 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
917 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
918 break;
919 default:
920 BUG();
921 }
922 val |= DMA_CCMD_ICC;
923
924 spin_lock_irqsave(&iommu->register_lock, flag);
925 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
926
927 /* Make sure hardware complete it */
928 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
929 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
930
931 spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
932}
933
ba395927 934/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
935static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
936 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
937{
938 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
939 u64 val = 0, val_iva = 0;
940 unsigned long flag;
941
ba395927
KA
942 switch (type) {
943 case DMA_TLB_GLOBAL_FLUSH:
944 /* global flush doesn't need set IVA_REG */
945 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
946 break;
947 case DMA_TLB_DSI_FLUSH:
948 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
949 break;
950 case DMA_TLB_PSI_FLUSH:
951 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
952 /* Note: always flush non-leaf currently */
953 val_iva = size_order | addr;
954 break;
955 default:
956 BUG();
957 }
958 /* Note: set drain read/write */
959#if 0
960 /*
961 * This is probably to be super secure.. Looks like we can
962 * ignore it without any impact.
963 */
964 if (cap_read_drain(iommu->cap))
965 val |= DMA_TLB_READ_DRAIN;
966#endif
967 if (cap_write_drain(iommu->cap))
968 val |= DMA_TLB_WRITE_DRAIN;
969
970 spin_lock_irqsave(&iommu->register_lock, flag);
971 /* Note: Only uses first TLB reg currently */
972 if (val_iva)
973 dmar_writeq(iommu->reg + tlb_offset, val_iva);
974 dmar_writeq(iommu->reg + tlb_offset + 8, val);
975
976 /* Make sure hardware complete it */
977 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
978 dmar_readq, (!(val & DMA_TLB_IVT)), val);
979
980 spin_unlock_irqrestore(&iommu->register_lock, flag);
981
982 /* check IOTLB invalidation granularity */
983 if (DMA_TLB_IAIG(val) == 0)
984 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
985 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
986 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
987 (unsigned long long)DMA_TLB_IIRG(type),
988 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
989}
990
93a23a72
YZ
991static struct device_domain_info *iommu_support_dev_iotlb(
992 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
993{
994 int found = 0;
995 unsigned long flags;
996 struct device_domain_info *info;
997 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
998
999 if (!ecap_dev_iotlb_support(iommu->ecap))
1000 return NULL;
1001
1002 if (!iommu->qi)
1003 return NULL;
1004
1005 spin_lock_irqsave(&device_domain_lock, flags);
1006 list_for_each_entry(info, &domain->devices, link)
1007 if (info->bus == bus && info->devfn == devfn) {
1008 found = 1;
1009 break;
1010 }
1011 spin_unlock_irqrestore(&device_domain_lock, flags);
1012
1013 if (!found || !info->dev)
1014 return NULL;
1015
1016 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1017 return NULL;
1018
1019 if (!dmar_find_matched_atsr_unit(info->dev))
1020 return NULL;
1021
1022 info->iommu = iommu;
1023
1024 return info;
1025}
1026
1027static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1028{
93a23a72
YZ
1029 if (!info)
1030 return;
1031
1032 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1033}
1034
1035static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1036{
1037 if (!info->dev || !pci_ats_enabled(info->dev))
1038 return;
1039
1040 pci_disable_ats(info->dev);
1041}
1042
1043static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1044 u64 addr, unsigned mask)
1045{
1046 u16 sid, qdep;
1047 unsigned long flags;
1048 struct device_domain_info *info;
1049
1050 spin_lock_irqsave(&device_domain_lock, flags);
1051 list_for_each_entry(info, &domain->devices, link) {
1052 if (!info->dev || !pci_ats_enabled(info->dev))
1053 continue;
1054
1055 sid = info->bus << 8 | info->devfn;
1056 qdep = pci_ats_queue_depth(info->dev);
1057 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1058 }
1059 spin_unlock_irqrestore(&device_domain_lock, flags);
1060}
1061
1f0ef2aa 1062static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
82653633 1063 unsigned long pfn, unsigned int pages, int map)
ba395927 1064{
9dd2fe89 1065 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1066 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1067
ba395927
KA
1068 BUG_ON(pages == 0);
1069
ba395927 1070 /*
9dd2fe89
YZ
1071 * Fallback to domain selective flush if no PSI support or the size is
1072 * too big.
ba395927
KA
1073 * PSI requires page size to be 2 ^ x, and the base address is naturally
1074 * aligned to the size
1075 */
9dd2fe89
YZ
1076 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1077 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1078 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1079 else
1080 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1081 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1082
1083 /*
82653633
NA
1084 * In caching mode, changes of pages from non-present to present require
1085 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1086 */
82653633 1087 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1088 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1089}
1090
f8bab735 1091static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1092{
1093 u32 pmen;
1094 unsigned long flags;
1095
1096 spin_lock_irqsave(&iommu->register_lock, flags);
1097 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1098 pmen &= ~DMA_PMEN_EPM;
1099 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1100
1101 /* wait for the protected region status bit to clear */
1102 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1103 readl, !(pmen & DMA_PMEN_PRS), pmen);
1104
1105 spin_unlock_irqrestore(&iommu->register_lock, flags);
1106}
1107
ba395927
KA
1108static int iommu_enable_translation(struct intel_iommu *iommu)
1109{
1110 u32 sts;
1111 unsigned long flags;
1112
1113 spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1114 iommu->gcmd |= DMA_GCMD_TE;
1115 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1116
1117 /* Make sure hardware complete it */
1118 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1119 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1120
ba395927
KA
1121 spin_unlock_irqrestore(&iommu->register_lock, flags);
1122 return 0;
1123}
1124
1125static int iommu_disable_translation(struct intel_iommu *iommu)
1126{
1127 u32 sts;
1128 unsigned long flag;
1129
1130 spin_lock_irqsave(&iommu->register_lock, flag);
1131 iommu->gcmd &= ~DMA_GCMD_TE;
1132 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1133
1134 /* Make sure hardware complete it */
1135 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1136 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927
KA
1137
1138 spin_unlock_irqrestore(&iommu->register_lock, flag);
1139 return 0;
1140}
1141
3460a6d9 1142
ba395927
KA
1143static int iommu_init_domains(struct intel_iommu *iommu)
1144{
1145 unsigned long ndomains;
1146 unsigned long nlongs;
1147
1148 ndomains = cap_ndoms(iommu->cap);
680a7524
YL
1149 pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1150 ndomains);
ba395927
KA
1151 nlongs = BITS_TO_LONGS(ndomains);
1152
94a91b50
DD
1153 spin_lock_init(&iommu->lock);
1154
ba395927
KA
1155 /* TBD: there might be 64K domains,
1156 * consider other allocation for future chip
1157 */
1158 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1159 if (!iommu->domain_ids) {
1160 printk(KERN_ERR "Allocating domain id array failed\n");
1161 return -ENOMEM;
1162 }
1163 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1164 GFP_KERNEL);
1165 if (!iommu->domains) {
1166 printk(KERN_ERR "Allocating domain array failed\n");
ba395927
KA
1167 return -ENOMEM;
1168 }
1169
1170 /*
1171 * if Caching mode is set, then invalid translations are tagged
1172 * with domainid 0. Hence we need to pre-allocate it.
1173 */
1174 if (cap_caching_mode(iommu->cap))
1175 set_bit(0, iommu->domain_ids);
1176 return 0;
1177}
ba395927 1178
ba395927
KA
1179
1180static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1181static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1182
1183void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1184{
1185 struct dmar_domain *domain;
1186 int i;
c7151a8d 1187 unsigned long flags;
ba395927 1188
94a91b50 1189 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1190 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
94a91b50
DD
1191 domain = iommu->domains[i];
1192 clear_bit(i, iommu->domain_ids);
1193
1194 spin_lock_irqsave(&domain->iommu_lock, flags);
1195 if (--domain->iommu_count == 0) {
1196 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1197 vm_domain_exit(domain);
1198 else
1199 domain_exit(domain);
1200 }
1201 spin_unlock_irqrestore(&domain->iommu_lock, flags);
5e98c4b1 1202 }
ba395927
KA
1203 }
1204
1205 if (iommu->gcmd & DMA_GCMD_TE)
1206 iommu_disable_translation(iommu);
1207
1208 if (iommu->irq) {
dced35ae 1209 irq_set_handler_data(iommu->irq, NULL);
ba395927
KA
1210 /* This will mask the irq */
1211 free_irq(iommu->irq, iommu);
1212 destroy_irq(iommu->irq);
1213 }
1214
1215 kfree(iommu->domains);
1216 kfree(iommu->domain_ids);
1217
d9630fe9
WH
1218 g_iommus[iommu->seq_id] = NULL;
1219
1220 /* if all iommus are freed, free g_iommus */
1221 for (i = 0; i < g_num_of_iommus; i++) {
1222 if (g_iommus[i])
1223 break;
1224 }
1225
1226 if (i == g_num_of_iommus)
1227 kfree(g_iommus);
1228
ba395927
KA
1229 /* free context mapping */
1230 free_context_table(iommu);
ba395927
KA
1231}
1232
2c2e2c38 1233static struct dmar_domain *alloc_domain(void)
ba395927 1234{
ba395927 1235 struct dmar_domain *domain;
ba395927
KA
1236
1237 domain = alloc_domain_mem();
1238 if (!domain)
1239 return NULL;
1240
4c923d47 1241 domain->nid = -1;
2c2e2c38
FY
1242 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1243 domain->flags = 0;
1244
1245 return domain;
1246}
1247
1248static int iommu_attach_domain(struct dmar_domain *domain,
1249 struct intel_iommu *iommu)
1250{
1251 int num;
1252 unsigned long ndomains;
1253 unsigned long flags;
1254
ba395927
KA
1255 ndomains = cap_ndoms(iommu->cap);
1256
1257 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1258
ba395927
KA
1259 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1260 if (num >= ndomains) {
1261 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1262 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1263 return -ENOMEM;
ba395927
KA
1264 }
1265
ba395927 1266 domain->id = num;
2c2e2c38 1267 set_bit(num, iommu->domain_ids);
8c11e798 1268 set_bit(iommu->seq_id, &domain->iommu_bmp);
ba395927
KA
1269 iommu->domains[num] = domain;
1270 spin_unlock_irqrestore(&iommu->lock, flags);
1271
2c2e2c38 1272 return 0;
ba395927
KA
1273}
1274
2c2e2c38
FY
1275static void iommu_detach_domain(struct dmar_domain *domain,
1276 struct intel_iommu *iommu)
ba395927
KA
1277{
1278 unsigned long flags;
2c2e2c38
FY
1279 int num, ndomains;
1280 int found = 0;
ba395927 1281
8c11e798 1282 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1283 ndomains = cap_ndoms(iommu->cap);
a45946ab 1284 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38
FY
1285 if (iommu->domains[num] == domain) {
1286 found = 1;
1287 break;
1288 }
2c2e2c38
FY
1289 }
1290
1291 if (found) {
1292 clear_bit(num, iommu->domain_ids);
1293 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1294 iommu->domains[num] = NULL;
1295 }
8c11e798 1296 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1297}
1298
1299static struct iova_domain reserved_iova_list;
8a443df4 1300static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1301
1302static void dmar_init_reserved_ranges(void)
1303{
1304 struct pci_dev *pdev = NULL;
1305 struct iova *iova;
1306 int i;
ba395927 1307
f661197e 1308 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1309
8a443df4
MG
1310 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1311 &reserved_rbtree_key);
1312
ba395927
KA
1313 /* IOAPIC ranges shouldn't be accessed by DMA */
1314 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1315 IOVA_PFN(IOAPIC_RANGE_END));
1316 if (!iova)
1317 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1318
1319 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1320 for_each_pci_dev(pdev) {
1321 struct resource *r;
1322
1323 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1324 r = &pdev->resource[i];
1325 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1326 continue;
1a4a4551
DW
1327 iova = reserve_iova(&reserved_iova_list,
1328 IOVA_PFN(r->start),
1329 IOVA_PFN(r->end));
ba395927
KA
1330 if (!iova)
1331 printk(KERN_ERR "Reserve iova failed\n");
1332 }
1333 }
1334
1335}
1336
1337static void domain_reserve_special_ranges(struct dmar_domain *domain)
1338{
1339 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1340}
1341
1342static inline int guestwidth_to_adjustwidth(int gaw)
1343{
1344 int agaw;
1345 int r = (gaw - 12) % 9;
1346
1347 if (r == 0)
1348 agaw = gaw;
1349 else
1350 agaw = gaw + 9 - r;
1351 if (agaw > 64)
1352 agaw = 64;
1353 return agaw;
1354}
1355
1356static int domain_init(struct dmar_domain *domain, int guest_width)
1357{
1358 struct intel_iommu *iommu;
1359 int adjust_width, agaw;
1360 unsigned long sagaw;
1361
f661197e 1362 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
c7151a8d 1363 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1364
1365 domain_reserve_special_ranges(domain);
1366
1367 /* calculate AGAW */
8c11e798 1368 iommu = domain_get_iommu(domain);
ba395927
KA
1369 if (guest_width > cap_mgaw(iommu->cap))
1370 guest_width = cap_mgaw(iommu->cap);
1371 domain->gaw = guest_width;
1372 adjust_width = guestwidth_to_adjustwidth(guest_width);
1373 agaw = width_to_agaw(adjust_width);
1374 sagaw = cap_sagaw(iommu->cap);
1375 if (!test_bit(agaw, &sagaw)) {
1376 /* hardware doesn't support it, choose a bigger one */
1377 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1378 agaw = find_next_bit(&sagaw, 5, agaw);
1379 if (agaw >= 5)
1380 return -ENODEV;
1381 }
1382 domain->agaw = agaw;
1383 INIT_LIST_HEAD(&domain->devices);
1384
8e604097
WH
1385 if (ecap_coherent(iommu->ecap))
1386 domain->iommu_coherency = 1;
1387 else
1388 domain->iommu_coherency = 0;
1389
58c610bd
SY
1390 if (ecap_sc_support(iommu->ecap))
1391 domain->iommu_snooping = 1;
1392 else
1393 domain->iommu_snooping = 0;
1394
c7151a8d 1395 domain->iommu_count = 1;
4c923d47 1396 domain->nid = iommu->node;
c7151a8d 1397
ba395927 1398 /* always allocate the top pgd */
4c923d47 1399 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1400 if (!domain->pgd)
1401 return -ENOMEM;
5b6985ce 1402 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1403 return 0;
1404}
1405
1406static void domain_exit(struct dmar_domain *domain)
1407{
2c2e2c38
FY
1408 struct dmar_drhd_unit *drhd;
1409 struct intel_iommu *iommu;
ba395927
KA
1410
1411 /* Domain 0 is reserved, so dont process it */
1412 if (!domain)
1413 return;
1414
1415 domain_remove_dev_info(domain);
1416 /* destroy iovas */
1417 put_iova_domain(&domain->iovad);
ba395927
KA
1418
1419 /* clear ptes */
595badf5 1420 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1421
1422 /* free page tables */
d794dc9b 1423 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1424
2c2e2c38
FY
1425 for_each_active_iommu(iommu, drhd)
1426 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1427 iommu_detach_domain(domain, iommu);
1428
ba395927
KA
1429 free_domain_mem(domain);
1430}
1431
4ed0d3e6
FY
1432static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1433 u8 bus, u8 devfn, int translation)
ba395927
KA
1434{
1435 struct context_entry *context;
ba395927 1436 unsigned long flags;
5331fe6f 1437 struct intel_iommu *iommu;
ea6606b0
WH
1438 struct dma_pte *pgd;
1439 unsigned long num;
1440 unsigned long ndomains;
1441 int id;
1442 int agaw;
93a23a72 1443 struct device_domain_info *info = NULL;
ba395927
KA
1444
1445 pr_debug("Set context mapping for %02x:%02x.%d\n",
1446 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1447
ba395927 1448 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1449 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1450 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1451
276dbf99 1452 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1453 if (!iommu)
1454 return -ENODEV;
1455
ba395927
KA
1456 context = device_to_context_entry(iommu, bus, devfn);
1457 if (!context)
1458 return -ENOMEM;
1459 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1460 if (context_present(context)) {
ba395927
KA
1461 spin_unlock_irqrestore(&iommu->lock, flags);
1462 return 0;
1463 }
1464
ea6606b0
WH
1465 id = domain->id;
1466 pgd = domain->pgd;
1467
2c2e2c38
FY
1468 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1469 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1470 int found = 0;
1471
1472 /* find an available domain id for this device in iommu */
1473 ndomains = cap_ndoms(iommu->cap);
a45946ab 1474 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1475 if (iommu->domains[num] == domain) {
1476 id = num;
1477 found = 1;
1478 break;
1479 }
ea6606b0
WH
1480 }
1481
1482 if (found == 0) {
1483 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1484 if (num >= ndomains) {
1485 spin_unlock_irqrestore(&iommu->lock, flags);
1486 printk(KERN_ERR "IOMMU: no free domain ids\n");
1487 return -EFAULT;
1488 }
1489
1490 set_bit(num, iommu->domain_ids);
1491 iommu->domains[num] = domain;
1492 id = num;
1493 }
1494
1495 /* Skip top levels of page tables for
1496 * iommu which has less agaw than default.
1672af11 1497 * Unnecessary for PT mode.
ea6606b0 1498 */
1672af11
CW
1499 if (translation != CONTEXT_TT_PASS_THROUGH) {
1500 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1501 pgd = phys_to_virt(dma_pte_addr(pgd));
1502 if (!dma_pte_present(pgd)) {
1503 spin_unlock_irqrestore(&iommu->lock, flags);
1504 return -ENOMEM;
1505 }
ea6606b0
WH
1506 }
1507 }
1508 }
1509
1510 context_set_domain_id(context, id);
4ed0d3e6 1511
93a23a72
YZ
1512 if (translation != CONTEXT_TT_PASS_THROUGH) {
1513 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1514 translation = info ? CONTEXT_TT_DEV_IOTLB :
1515 CONTEXT_TT_MULTI_LEVEL;
1516 }
4ed0d3e6
FY
1517 /*
1518 * In pass through mode, AW must be programmed to indicate the largest
1519 * AGAW value supported by hardware. And ASR is ignored by hardware.
1520 */
93a23a72 1521 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1522 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1523 else {
1524 context_set_address_root(context, virt_to_phys(pgd));
1525 context_set_address_width(context, iommu->agaw);
1526 }
4ed0d3e6
FY
1527
1528 context_set_translation_type(context, translation);
c07e7d21
MM
1529 context_set_fault_enable(context);
1530 context_set_present(context);
5331fe6f 1531 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1532
4c25a2c1
DW
1533 /*
1534 * It's a non-present to present mapping. If hardware doesn't cache
1535 * non-present entry we only need to flush the write-buffer. If the
1536 * _does_ cache non-present entries, then it does so in the special
1537 * domain #0, which we have to flush:
1538 */
1539 if (cap_caching_mode(iommu->cap)) {
1540 iommu->flush.flush_context(iommu, 0,
1541 (((u16)bus) << 8) | devfn,
1542 DMA_CCMD_MASK_NOBIT,
1543 DMA_CCMD_DEVICE_INVL);
82653633 1544 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1545 } else {
ba395927 1546 iommu_flush_write_buffer(iommu);
4c25a2c1 1547 }
93a23a72 1548 iommu_enable_dev_iotlb(info);
ba395927 1549 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1550
1551 spin_lock_irqsave(&domain->iommu_lock, flags);
1552 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1553 domain->iommu_count++;
4c923d47
SS
1554 if (domain->iommu_count == 1)
1555 domain->nid = iommu->node;
58c610bd 1556 domain_update_iommu_cap(domain);
c7151a8d
WH
1557 }
1558 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1559 return 0;
1560}
1561
1562static int
4ed0d3e6
FY
1563domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1564 int translation)
ba395927
KA
1565{
1566 int ret;
1567 struct pci_dev *tmp, *parent;
1568
276dbf99 1569 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1570 pdev->bus->number, pdev->devfn,
1571 translation);
ba395927
KA
1572 if (ret)
1573 return ret;
1574
1575 /* dependent device mapping */
1576 tmp = pci_find_upstream_pcie_bridge(pdev);
1577 if (!tmp)
1578 return 0;
1579 /* Secondary interface's bus number and devfn 0 */
1580 parent = pdev->bus->self;
1581 while (parent != tmp) {
276dbf99
DW
1582 ret = domain_context_mapping_one(domain,
1583 pci_domain_nr(parent->bus),
1584 parent->bus->number,
4ed0d3e6 1585 parent->devfn, translation);
ba395927
KA
1586 if (ret)
1587 return ret;
1588 parent = parent->bus->self;
1589 }
45e829ea 1590 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
ba395927 1591 return domain_context_mapping_one(domain,
276dbf99 1592 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1593 tmp->subordinate->number, 0,
1594 translation);
ba395927
KA
1595 else /* this is a legacy PCI bridge */
1596 return domain_context_mapping_one(domain,
276dbf99
DW
1597 pci_domain_nr(tmp->bus),
1598 tmp->bus->number,
4ed0d3e6
FY
1599 tmp->devfn,
1600 translation);
ba395927
KA
1601}
1602
5331fe6f 1603static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1604{
1605 int ret;
1606 struct pci_dev *tmp, *parent;
5331fe6f
WH
1607 struct intel_iommu *iommu;
1608
276dbf99
DW
1609 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1610 pdev->devfn);
5331fe6f
WH
1611 if (!iommu)
1612 return -ENODEV;
ba395927 1613
276dbf99 1614 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1615 if (!ret)
1616 return ret;
1617 /* dependent device mapping */
1618 tmp = pci_find_upstream_pcie_bridge(pdev);
1619 if (!tmp)
1620 return ret;
1621 /* Secondary interface's bus number and devfn 0 */
1622 parent = pdev->bus->self;
1623 while (parent != tmp) {
8c11e798 1624 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1625 parent->devfn);
ba395927
KA
1626 if (!ret)
1627 return ret;
1628 parent = parent->bus->self;
1629 }
5f4d91a1 1630 if (pci_is_pcie(tmp))
276dbf99
DW
1631 return device_context_mapped(iommu, tmp->subordinate->number,
1632 0);
ba395927 1633 else
276dbf99
DW
1634 return device_context_mapped(iommu, tmp->bus->number,
1635 tmp->devfn);
ba395927
KA
1636}
1637
f532959b
FY
1638/* Returns a number of VTD pages, but aligned to MM page size */
1639static inline unsigned long aligned_nrpages(unsigned long host_addr,
1640 size_t size)
1641{
1642 host_addr &= ~PAGE_MASK;
1643 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1644}
1645
9051aa02
DW
1646static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1647 struct scatterlist *sg, unsigned long phys_pfn,
1648 unsigned long nr_pages, int prot)
e1605495
DW
1649{
1650 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1651 phys_addr_t uninitialized_var(pteval);
e1605495 1652 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1653 unsigned long sg_res;
e1605495
DW
1654
1655 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1656
1657 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1658 return -EINVAL;
1659
1660 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1661
9051aa02
DW
1662 if (sg)
1663 sg_res = 0;
1664 else {
1665 sg_res = nr_pages + 1;
1666 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1667 }
1668
e1605495 1669 while (nr_pages--) {
c85994e4
DW
1670 uint64_t tmp;
1671
e1605495 1672 if (!sg_res) {
f532959b 1673 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1674 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1675 sg->dma_length = sg->length;
1676 pteval = page_to_phys(sg_page(sg)) | prot;
1677 }
1678 if (!pte) {
1679 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1680 if (!pte)
1681 return -ENOMEM;
1682 }
1683 /* We don't need lock here, nobody else
1684 * touches the iova range
1685 */
7766a3fb 1686 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1687 if (tmp) {
1bf20f0d 1688 static int dumps = 5;
c85994e4
DW
1689 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1690 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1691 if (dumps) {
1692 dumps--;
1693 debug_dma_dump_mappings(NULL);
1694 }
1695 WARN_ON(1);
1696 }
e1605495 1697 pte++;
75e6bf96 1698 if (!nr_pages || first_pte_in_page(pte)) {
e1605495
DW
1699 domain_flush_cache(domain, first_pte,
1700 (void *)pte - (void *)first_pte);
1701 pte = NULL;
1702 }
1703 iov_pfn++;
1704 pteval += VTD_PAGE_SIZE;
1705 sg_res--;
1706 if (!sg_res)
1707 sg = sg_next(sg);
1708 }
1709 return 0;
1710}
1711
9051aa02
DW
1712static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1713 struct scatterlist *sg, unsigned long nr_pages,
1714 int prot)
ba395927 1715{
9051aa02
DW
1716 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1717}
6f6a00e4 1718
9051aa02
DW
1719static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1720 unsigned long phys_pfn, unsigned long nr_pages,
1721 int prot)
1722{
1723 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1724}
1725
c7151a8d 1726static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1727{
c7151a8d
WH
1728 if (!iommu)
1729 return;
8c11e798
WH
1730
1731 clear_context_table(iommu, bus, devfn);
1732 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1733 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1734 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1735}
1736
1737static void domain_remove_dev_info(struct dmar_domain *domain)
1738{
1739 struct device_domain_info *info;
1740 unsigned long flags;
c7151a8d 1741 struct intel_iommu *iommu;
ba395927
KA
1742
1743 spin_lock_irqsave(&device_domain_lock, flags);
1744 while (!list_empty(&domain->devices)) {
1745 info = list_entry(domain->devices.next,
1746 struct device_domain_info, link);
1747 list_del(&info->link);
1748 list_del(&info->global);
1749 if (info->dev)
358dd8ac 1750 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1751 spin_unlock_irqrestore(&device_domain_lock, flags);
1752
93a23a72 1753 iommu_disable_dev_iotlb(info);
276dbf99 1754 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1755 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1756 free_devinfo_mem(info);
1757
1758 spin_lock_irqsave(&device_domain_lock, flags);
1759 }
1760 spin_unlock_irqrestore(&device_domain_lock, flags);
1761}
1762
1763/*
1764 * find_domain
358dd8ac 1765 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1766 */
38717946 1767static struct dmar_domain *
ba395927
KA
1768find_domain(struct pci_dev *pdev)
1769{
1770 struct device_domain_info *info;
1771
1772 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1773 info = pdev->dev.archdata.iommu;
ba395927
KA
1774 if (info)
1775 return info->domain;
1776 return NULL;
1777}
1778
ba395927
KA
1779/* domain is initialized */
1780static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1781{
1782 struct dmar_domain *domain, *found = NULL;
1783 struct intel_iommu *iommu;
1784 struct dmar_drhd_unit *drhd;
1785 struct device_domain_info *info, *tmp;
1786 struct pci_dev *dev_tmp;
1787 unsigned long flags;
1788 int bus = 0, devfn = 0;
276dbf99 1789 int segment;
2c2e2c38 1790 int ret;
ba395927
KA
1791
1792 domain = find_domain(pdev);
1793 if (domain)
1794 return domain;
1795
276dbf99
DW
1796 segment = pci_domain_nr(pdev->bus);
1797
ba395927
KA
1798 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1799 if (dev_tmp) {
5f4d91a1 1800 if (pci_is_pcie(dev_tmp)) {
ba395927
KA
1801 bus = dev_tmp->subordinate->number;
1802 devfn = 0;
1803 } else {
1804 bus = dev_tmp->bus->number;
1805 devfn = dev_tmp->devfn;
1806 }
1807 spin_lock_irqsave(&device_domain_lock, flags);
1808 list_for_each_entry(info, &device_domain_list, global) {
276dbf99
DW
1809 if (info->segment == segment &&
1810 info->bus == bus && info->devfn == devfn) {
ba395927
KA
1811 found = info->domain;
1812 break;
1813 }
1814 }
1815 spin_unlock_irqrestore(&device_domain_lock, flags);
1816 /* pcie-pci bridge already has a domain, uses it */
1817 if (found) {
1818 domain = found;
1819 goto found_domain;
1820 }
1821 }
1822
2c2e2c38
FY
1823 domain = alloc_domain();
1824 if (!domain)
1825 goto error;
1826
ba395927
KA
1827 /* Allocate new domain for the device */
1828 drhd = dmar_find_matched_drhd_unit(pdev);
1829 if (!drhd) {
1830 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1831 pci_name(pdev));
1832 return NULL;
1833 }
1834 iommu = drhd->iommu;
1835
2c2e2c38
FY
1836 ret = iommu_attach_domain(domain, iommu);
1837 if (ret) {
1838 domain_exit(domain);
ba395927 1839 goto error;
2c2e2c38 1840 }
ba395927
KA
1841
1842 if (domain_init(domain, gaw)) {
1843 domain_exit(domain);
1844 goto error;
1845 }
1846
1847 /* register pcie-to-pci device */
1848 if (dev_tmp) {
1849 info = alloc_devinfo_mem();
1850 if (!info) {
1851 domain_exit(domain);
1852 goto error;
1853 }
276dbf99 1854 info->segment = segment;
ba395927
KA
1855 info->bus = bus;
1856 info->devfn = devfn;
1857 info->dev = NULL;
1858 info->domain = domain;
1859 /* This domain is shared by devices under p2p bridge */
3b5410e7 1860 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1861
1862 /* pcie-to-pci bridge already has a domain, uses it */
1863 found = NULL;
1864 spin_lock_irqsave(&device_domain_lock, flags);
1865 list_for_each_entry(tmp, &device_domain_list, global) {
276dbf99
DW
1866 if (tmp->segment == segment &&
1867 tmp->bus == bus && tmp->devfn == devfn) {
ba395927
KA
1868 found = tmp->domain;
1869 break;
1870 }
1871 }
1872 if (found) {
00dfff77 1873 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1874 free_devinfo_mem(info);
1875 domain_exit(domain);
1876 domain = found;
1877 } else {
1878 list_add(&info->link, &domain->devices);
1879 list_add(&info->global, &device_domain_list);
00dfff77 1880 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 1881 }
ba395927
KA
1882 }
1883
1884found_domain:
1885 info = alloc_devinfo_mem();
1886 if (!info)
1887 goto error;
276dbf99 1888 info->segment = segment;
ba395927
KA
1889 info->bus = pdev->bus->number;
1890 info->devfn = pdev->devfn;
1891 info->dev = pdev;
1892 info->domain = domain;
1893 spin_lock_irqsave(&device_domain_lock, flags);
1894 /* somebody is fast */
1895 found = find_domain(pdev);
1896 if (found != NULL) {
1897 spin_unlock_irqrestore(&device_domain_lock, flags);
1898 if (found != domain) {
1899 domain_exit(domain);
1900 domain = found;
1901 }
1902 free_devinfo_mem(info);
1903 return domain;
1904 }
1905 list_add(&info->link, &domain->devices);
1906 list_add(&info->global, &device_domain_list);
358dd8ac 1907 pdev->dev.archdata.iommu = info;
ba395927
KA
1908 spin_unlock_irqrestore(&device_domain_lock, flags);
1909 return domain;
1910error:
1911 /* recheck it here, maybe others set it */
1912 return find_domain(pdev);
1913}
1914
2c2e2c38 1915static int iommu_identity_mapping;
e0fc7e0b
DW
1916#define IDENTMAP_ALL 1
1917#define IDENTMAP_GFX 2
1918#define IDENTMAP_AZALIA 4
2c2e2c38 1919
b213203e
DW
1920static int iommu_domain_identity_map(struct dmar_domain *domain,
1921 unsigned long long start,
1922 unsigned long long end)
ba395927 1923{
c5395d5c
DW
1924 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1925 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1926
1927 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1928 dma_to_mm_pfn(last_vpfn))) {
ba395927 1929 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 1930 return -ENOMEM;
ba395927
KA
1931 }
1932
c5395d5c
DW
1933 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1934 start, end, domain->id);
ba395927
KA
1935 /*
1936 * RMRR range might have overlap with physical memory range,
1937 * clear it first
1938 */
c5395d5c 1939 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 1940
c5395d5c
DW
1941 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1942 last_vpfn - first_vpfn + 1,
61df7443 1943 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
1944}
1945
1946static int iommu_prepare_identity_map(struct pci_dev *pdev,
1947 unsigned long long start,
1948 unsigned long long end)
1949{
1950 struct dmar_domain *domain;
1951 int ret;
1952
c7ab48d2 1953 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
1954 if (!domain)
1955 return -ENOMEM;
1956
19943b0e
DW
1957 /* For _hardware_ passthrough, don't bother. But for software
1958 passthrough, we do it anyway -- it may indicate a memory
1959 range which is reserved in E820, so which didn't get set
1960 up to start with in si_domain */
1961 if (domain == si_domain && hw_pass_through) {
1962 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
1963 pci_name(pdev), start, end);
1964 return 0;
1965 }
1966
1967 printk(KERN_INFO
1968 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1969 pci_name(pdev), start, end);
2ff729f5 1970
5595b528
DW
1971 if (end < start) {
1972 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
1973 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1974 dmi_get_system_info(DMI_BIOS_VENDOR),
1975 dmi_get_system_info(DMI_BIOS_VERSION),
1976 dmi_get_system_info(DMI_PRODUCT_VERSION));
1977 ret = -EIO;
1978 goto error;
1979 }
1980
2ff729f5
DW
1981 if (end >> agaw_to_width(domain->agaw)) {
1982 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
1983 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1984 agaw_to_width(domain->agaw),
1985 dmi_get_system_info(DMI_BIOS_VENDOR),
1986 dmi_get_system_info(DMI_BIOS_VERSION),
1987 dmi_get_system_info(DMI_PRODUCT_VERSION));
1988 ret = -EIO;
1989 goto error;
1990 }
19943b0e 1991
b213203e 1992 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
1993 if (ret)
1994 goto error;
1995
1996 /* context entry init */
4ed0d3e6 1997 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
1998 if (ret)
1999 goto error;
2000
2001 return 0;
2002
2003 error:
ba395927
KA
2004 domain_exit(domain);
2005 return ret;
ba395927
KA
2006}
2007
2008static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2009 struct pci_dev *pdev)
2010{
358dd8ac 2011 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2012 return 0;
2013 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2014 rmrr->end_address + 1);
2015}
2016
49a0429e
KA
2017#ifdef CONFIG_DMAR_FLOPPY_WA
2018static inline void iommu_prepare_isa(void)
2019{
2020 struct pci_dev *pdev;
2021 int ret;
2022
2023 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2024 if (!pdev)
2025 return;
2026
c7ab48d2 2027 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
49a0429e
KA
2028 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
2029
2030 if (ret)
c7ab48d2
DW
2031 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2032 "floppy might not work\n");
49a0429e
KA
2033
2034}
2035#else
2036static inline void iommu_prepare_isa(void)
2037{
2038 return;
2039}
2040#endif /* !CONFIG_DMAR_FLPY_WA */
2041
2c2e2c38 2042static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2
DW
2043
2044static int __init si_domain_work_fn(unsigned long start_pfn,
2045 unsigned long end_pfn, void *datax)
2046{
2047 int *ret = datax;
2048
2049 *ret = iommu_domain_identity_map(si_domain,
2050 (uint64_t)start_pfn << PAGE_SHIFT,
2051 (uint64_t)end_pfn << PAGE_SHIFT);
2052 return *ret;
2053
2054}
2055
071e1374 2056static int __init si_domain_init(int hw)
2c2e2c38
FY
2057{
2058 struct dmar_drhd_unit *drhd;
2059 struct intel_iommu *iommu;
c7ab48d2 2060 int nid, ret = 0;
2c2e2c38
FY
2061
2062 si_domain = alloc_domain();
2063 if (!si_domain)
2064 return -EFAULT;
2065
c7ab48d2 2066 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2c2e2c38
FY
2067
2068 for_each_active_iommu(iommu, drhd) {
2069 ret = iommu_attach_domain(si_domain, iommu);
2070 if (ret) {
2071 domain_exit(si_domain);
2072 return -EFAULT;
2073 }
2074 }
2075
2076 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2077 domain_exit(si_domain);
2078 return -EFAULT;
2079 }
2080
2081 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2082
19943b0e
DW
2083 if (hw)
2084 return 0;
2085
c7ab48d2
DW
2086 for_each_online_node(nid) {
2087 work_with_active_regions(nid, si_domain_work_fn, &ret);
2088 if (ret)
2089 return ret;
2090 }
2091
2c2e2c38
FY
2092 return 0;
2093}
2094
2095static void domain_remove_one_dev_info(struct dmar_domain *domain,
2096 struct pci_dev *pdev);
2097static int identity_mapping(struct pci_dev *pdev)
2098{
2099 struct device_domain_info *info;
2100
2101 if (likely(!iommu_identity_mapping))
2102 return 0;
2103
2104
2105 list_for_each_entry(info, &si_domain->devices, link)
2106 if (info->dev == pdev)
2107 return 1;
2108 return 0;
2109}
2110
2111static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2112 struct pci_dev *pdev,
2113 int translation)
2c2e2c38
FY
2114{
2115 struct device_domain_info *info;
2116 unsigned long flags;
5fe60f4e 2117 int ret;
2c2e2c38
FY
2118
2119 info = alloc_devinfo_mem();
2120 if (!info)
2121 return -ENOMEM;
2122
5fe60f4e
DW
2123 ret = domain_context_mapping(domain, pdev, translation);
2124 if (ret) {
2125 free_devinfo_mem(info);
2126 return ret;
2127 }
2128
2c2e2c38
FY
2129 info->segment = pci_domain_nr(pdev->bus);
2130 info->bus = pdev->bus->number;
2131 info->devfn = pdev->devfn;
2132 info->dev = pdev;
2133 info->domain = domain;
2134
2135 spin_lock_irqsave(&device_domain_lock, flags);
2136 list_add(&info->link, &domain->devices);
2137 list_add(&info->global, &device_domain_list);
2138 pdev->dev.archdata.iommu = info;
2139 spin_unlock_irqrestore(&device_domain_lock, flags);
2140
2141 return 0;
2142}
2143
6941af28
DW
2144static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2145{
e0fc7e0b
DW
2146 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2147 return 1;
2148
2149 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2150 return 1;
2151
2152 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2153 return 0;
6941af28 2154
3dfc813d
DW
2155 /*
2156 * We want to start off with all devices in the 1:1 domain, and
2157 * take them out later if we find they can't access all of memory.
2158 *
2159 * However, we can't do this for PCI devices behind bridges,
2160 * because all PCI devices behind the same bridge will end up
2161 * with the same source-id on their transactions.
2162 *
2163 * Practically speaking, we can't change things around for these
2164 * devices at run-time, because we can't be sure there'll be no
2165 * DMA transactions in flight for any of their siblings.
2166 *
2167 * So PCI devices (unless they're on the root bus) as well as
2168 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2169 * the 1:1 domain, just in _case_ one of their siblings turns out
2170 * not to be able to map all of memory.
2171 */
5f4d91a1 2172 if (!pci_is_pcie(pdev)) {
3dfc813d
DW
2173 if (!pci_is_root_bus(pdev->bus))
2174 return 0;
2175 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2176 return 0;
2177 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2178 return 0;
2179
2180 /*
2181 * At boot time, we don't yet know if devices will be 64-bit capable.
2182 * Assume that they will -- if they turn out not to be, then we can
2183 * take them out of the 1:1 domain later.
2184 */
6941af28
DW
2185 if (!startup)
2186 return pdev->dma_mask > DMA_BIT_MASK(32);
2187
2188 return 1;
2189}
2190
071e1374 2191static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2192{
2c2e2c38
FY
2193 struct pci_dev *pdev = NULL;
2194 int ret;
2195
19943b0e 2196 ret = si_domain_init(hw);
2c2e2c38
FY
2197 if (ret)
2198 return -EFAULT;
2199
2c2e2c38 2200 for_each_pci_dev(pdev) {
6941af28 2201 if (iommu_should_identity_map(pdev, 1)) {
19943b0e
DW
2202 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2203 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2204
5fe60f4e 2205 ret = domain_add_dev_info(si_domain, pdev,
19943b0e 2206 hw ? CONTEXT_TT_PASS_THROUGH :
62edf5dc
DW
2207 CONTEXT_TT_MULTI_LEVEL);
2208 if (ret)
2209 return ret;
62edf5dc 2210 }
2c2e2c38
FY
2211 }
2212
2213 return 0;
2214}
2215
2216int __init init_dmars(void)
ba395927
KA
2217{
2218 struct dmar_drhd_unit *drhd;
2219 struct dmar_rmrr_unit *rmrr;
2220 struct pci_dev *pdev;
2221 struct intel_iommu *iommu;
9d783ba0 2222 int i, ret;
2c2e2c38 2223
ba395927
KA
2224 /*
2225 * for each drhd
2226 * allocate root
2227 * initialize and program root entry to not present
2228 * endfor
2229 */
2230 for_each_drhd_unit(drhd) {
5e0d2a6f 2231 g_num_of_iommus++;
2232 /*
2233 * lock not needed as this is only incremented in the single
2234 * threaded kernel __init code path all other access are read
2235 * only
2236 */
2237 }
2238
d9630fe9
WH
2239 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2240 GFP_KERNEL);
2241 if (!g_iommus) {
2242 printk(KERN_ERR "Allocating global iommu array failed\n");
2243 ret = -ENOMEM;
2244 goto error;
2245 }
2246
80b20dd8 2247 deferred_flush = kzalloc(g_num_of_iommus *
2248 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2249 if (!deferred_flush) {
5e0d2a6f 2250 ret = -ENOMEM;
2251 goto error;
2252 }
2253
5e0d2a6f 2254 for_each_drhd_unit(drhd) {
2255 if (drhd->ignored)
2256 continue;
1886e8a9
SS
2257
2258 iommu = drhd->iommu;
d9630fe9 2259 g_iommus[iommu->seq_id] = iommu;
ba395927 2260
e61d98d8
SS
2261 ret = iommu_init_domains(iommu);
2262 if (ret)
2263 goto error;
2264
ba395927
KA
2265 /*
2266 * TBD:
2267 * we could share the same root & context tables
25985edc 2268 * among all IOMMU's. Need to Split it later.
ba395927
KA
2269 */
2270 ret = iommu_alloc_root_entry(iommu);
2271 if (ret) {
2272 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2273 goto error;
2274 }
4ed0d3e6 2275 if (!ecap_pass_through(iommu->ecap))
19943b0e 2276 hw_pass_through = 0;
ba395927
KA
2277 }
2278
1531a6a6
SS
2279 /*
2280 * Start from the sane iommu hardware state.
2281 */
a77b67d4
YS
2282 for_each_drhd_unit(drhd) {
2283 if (drhd->ignored)
2284 continue;
2285
2286 iommu = drhd->iommu;
1531a6a6
SS
2287
2288 /*
2289 * If the queued invalidation is already initialized by us
2290 * (for example, while enabling interrupt-remapping) then
2291 * we got the things already rolling from a sane state.
2292 */
2293 if (iommu->qi)
2294 continue;
2295
2296 /*
2297 * Clear any previous faults.
2298 */
2299 dmar_fault(-1, iommu);
2300 /*
2301 * Disable queued invalidation if supported and already enabled
2302 * before OS handover.
2303 */
2304 dmar_disable_qi(iommu);
2305 }
2306
2307 for_each_drhd_unit(drhd) {
2308 if (drhd->ignored)
2309 continue;
2310
2311 iommu = drhd->iommu;
2312
a77b67d4
YS
2313 if (dmar_enable_qi(iommu)) {
2314 /*
2315 * Queued Invalidate not enabled, use Register Based
2316 * Invalidate
2317 */
2318 iommu->flush.flush_context = __iommu_flush_context;
2319 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2320 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2321 "invalidation\n",
680a7524 2322 iommu->seq_id,
b4e0f9eb 2323 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2324 } else {
2325 iommu->flush.flush_context = qi_flush_context;
2326 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2327 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2328 "invalidation\n",
680a7524 2329 iommu->seq_id,
b4e0f9eb 2330 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2331 }
2332 }
2333
19943b0e 2334 if (iommu_pass_through)
e0fc7e0b
DW
2335 iommu_identity_mapping |= IDENTMAP_ALL;
2336
19943b0e 2337#ifdef CONFIG_DMAR_BROKEN_GFX_WA
e0fc7e0b 2338 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2339#endif
e0fc7e0b
DW
2340
2341 check_tylersburg_isoch();
2342
ba395927 2343 /*
19943b0e
DW
2344 * If pass through is not set or not enabled, setup context entries for
2345 * identity mappings for rmrr, gfx, and isa and may fall back to static
2346 * identity mapping if iommu_identity_mapping is set.
ba395927 2347 */
19943b0e
DW
2348 if (iommu_identity_mapping) {
2349 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2350 if (ret) {
19943b0e
DW
2351 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2352 goto error;
ba395927
KA
2353 }
2354 }
ba395927 2355 /*
19943b0e
DW
2356 * For each rmrr
2357 * for each dev attached to rmrr
2358 * do
2359 * locate drhd for dev, alloc domain for dev
2360 * allocate free domain
2361 * allocate page table entries for rmrr
2362 * if context not allocated for bus
2363 * allocate and init context
2364 * set present in root table for this bus
2365 * init context with domain, translation etc
2366 * endfor
2367 * endfor
ba395927 2368 */
19943b0e
DW
2369 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2370 for_each_rmrr_units(rmrr) {
2371 for (i = 0; i < rmrr->devices_cnt; i++) {
2372 pdev = rmrr->devices[i];
2373 /*
2374 * some BIOS lists non-exist devices in DMAR
2375 * table.
2376 */
2377 if (!pdev)
2378 continue;
2379 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2380 if (ret)
2381 printk(KERN_ERR
2382 "IOMMU: mapping reserved region failed\n");
ba395927 2383 }
4ed0d3e6 2384 }
49a0429e 2385
19943b0e
DW
2386 iommu_prepare_isa();
2387
ba395927
KA
2388 /*
2389 * for each drhd
2390 * enable fault log
2391 * global invalidate context cache
2392 * global invalidate iotlb
2393 * enable translation
2394 */
2395 for_each_drhd_unit(drhd) {
2396 if (drhd->ignored)
2397 continue;
2398 iommu = drhd->iommu;
ba395927
KA
2399
2400 iommu_flush_write_buffer(iommu);
2401
3460a6d9
KA
2402 ret = dmar_set_interrupt(iommu);
2403 if (ret)
2404 goto error;
2405
ba395927
KA
2406 iommu_set_root_entry(iommu);
2407
4c25a2c1 2408 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2409 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2410
ba395927
KA
2411 ret = iommu_enable_translation(iommu);
2412 if (ret)
2413 goto error;
b94996c9
DW
2414
2415 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2416 }
2417
2418 return 0;
2419error:
2420 for_each_drhd_unit(drhd) {
2421 if (drhd->ignored)
2422 continue;
2423 iommu = drhd->iommu;
2424 free_iommu(iommu);
2425 }
d9630fe9 2426 kfree(g_iommus);
ba395927
KA
2427 return ret;
2428}
2429
5a5e02a6 2430/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2431static struct iova *intel_alloc_iova(struct device *dev,
2432 struct dmar_domain *domain,
2433 unsigned long nrpages, uint64_t dma_mask)
ba395927 2434{
ba395927 2435 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2436 struct iova *iova = NULL;
ba395927 2437
875764de
DW
2438 /* Restrict dma_mask to the width that the iommu can handle */
2439 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2440
2441 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2442 /*
2443 * First try to allocate an io virtual address in
284901a9 2444 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2445 * from higher range
ba395927 2446 */
875764de
DW
2447 iova = alloc_iova(&domain->iovad, nrpages,
2448 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2449 if (iova)
2450 return iova;
2451 }
2452 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2453 if (unlikely(!iova)) {
2454 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2455 nrpages, pci_name(pdev));
f76aec76
KA
2456 return NULL;
2457 }
2458
2459 return iova;
2460}
2461
147202aa 2462static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2463{
2464 struct dmar_domain *domain;
2465 int ret;
2466
2467 domain = get_domain_for_dev(pdev,
2468 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2469 if (!domain) {
2470 printk(KERN_ERR
2471 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2472 return NULL;
ba395927
KA
2473 }
2474
2475 /* make sure context mapping is ok */
5331fe6f 2476 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2477 ret = domain_context_mapping(domain, pdev,
2478 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2479 if (ret) {
2480 printk(KERN_ERR
2481 "Domain context map for %s failed",
2482 pci_name(pdev));
4fe05bbc 2483 return NULL;
f76aec76 2484 }
ba395927
KA
2485 }
2486
f76aec76
KA
2487 return domain;
2488}
2489
147202aa
DW
2490static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2491{
2492 struct device_domain_info *info;
2493
2494 /* No lock here, assumes no domain exit in normal case */
2495 info = dev->dev.archdata.iommu;
2496 if (likely(info))
2497 return info->domain;
2498
2499 return __get_valid_domain_for_dev(dev);
2500}
2501
2c2e2c38
FY
2502static int iommu_dummy(struct pci_dev *pdev)
2503{
2504 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2505}
2506
2507/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2508static int iommu_no_mapping(struct device *dev)
2c2e2c38 2509{
73676832 2510 struct pci_dev *pdev;
2c2e2c38
FY
2511 int found;
2512
73676832
DW
2513 if (unlikely(dev->bus != &pci_bus_type))
2514 return 1;
2515
2516 pdev = to_pci_dev(dev);
1e4c64c4
DW
2517 if (iommu_dummy(pdev))
2518 return 1;
2519
2c2e2c38 2520 if (!iommu_identity_mapping)
1e4c64c4 2521 return 0;
2c2e2c38
FY
2522
2523 found = identity_mapping(pdev);
2524 if (found) {
6941af28 2525 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2526 return 1;
2527 else {
2528 /*
2529 * 32 bit DMA is removed from si_domain and fall back
2530 * to non-identity mapping.
2531 */
2532 domain_remove_one_dev_info(si_domain, pdev);
2533 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2534 pci_name(pdev));
2535 return 0;
2536 }
2537 } else {
2538 /*
2539 * In case of a detached 64 bit DMA device from vm, the device
2540 * is put into si_domain for identity mapping.
2541 */
6941af28 2542 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2543 int ret;
5fe60f4e
DW
2544 ret = domain_add_dev_info(si_domain, pdev,
2545 hw_pass_through ?
2546 CONTEXT_TT_PASS_THROUGH :
2547 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2548 if (!ret) {
2549 printk(KERN_INFO "64bit %s uses identity mapping\n",
2550 pci_name(pdev));
2551 return 1;
2552 }
2553 }
2554 }
2555
1e4c64c4 2556 return 0;
2c2e2c38
FY
2557}
2558
bb9e6d65
FT
2559static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2560 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2561{
2562 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2563 struct dmar_domain *domain;
5b6985ce 2564 phys_addr_t start_paddr;
f76aec76
KA
2565 struct iova *iova;
2566 int prot = 0;
6865f0d1 2567 int ret;
8c11e798 2568 struct intel_iommu *iommu;
33041ec0 2569 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2570
2571 BUG_ON(dir == DMA_NONE);
2c2e2c38 2572
73676832 2573 if (iommu_no_mapping(hwdev))
6865f0d1 2574 return paddr;
f76aec76
KA
2575
2576 domain = get_valid_domain_for_dev(pdev);
2577 if (!domain)
2578 return 0;
2579
8c11e798 2580 iommu = domain_get_iommu(domain);
88cb6a74 2581 size = aligned_nrpages(paddr, size);
f76aec76 2582
5a5e02a6
DW
2583 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2584 pdev->dma_mask);
f76aec76
KA
2585 if (!iova)
2586 goto error;
2587
ba395927
KA
2588 /*
2589 * Check if DMAR supports zero-length reads on write only
2590 * mappings..
2591 */
2592 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2593 !cap_zlr(iommu->cap))
ba395927
KA
2594 prot |= DMA_PTE_READ;
2595 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2596 prot |= DMA_PTE_WRITE;
2597 /*
6865f0d1 2598 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2599 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2600 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2601 * is not a big problem
2602 */
0ab36de2 2603 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2604 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2605 if (ret)
2606 goto error;
2607
1f0ef2aa
DW
2608 /* it's a non-present to present mapping. Only flush if caching mode */
2609 if (cap_caching_mode(iommu->cap))
82653633 2610 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
1f0ef2aa 2611 else
8c11e798 2612 iommu_flush_write_buffer(iommu);
f76aec76 2613
03d6a246
DW
2614 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2615 start_paddr += paddr & ~PAGE_MASK;
2616 return start_paddr;
ba395927 2617
ba395927 2618error:
f76aec76
KA
2619 if (iova)
2620 __free_iova(&domain->iovad, iova);
4cf2e75d 2621 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2622 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2623 return 0;
2624}
2625
ffbbef5c
FT
2626static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2627 unsigned long offset, size_t size,
2628 enum dma_data_direction dir,
2629 struct dma_attrs *attrs)
bb9e6d65 2630{
ffbbef5c
FT
2631 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2632 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2633}
2634
5e0d2a6f 2635static void flush_unmaps(void)
2636{
80b20dd8 2637 int i, j;
5e0d2a6f 2638
5e0d2a6f 2639 timer_on = 0;
2640
2641 /* just flush them all */
2642 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2643 struct intel_iommu *iommu = g_iommus[i];
2644 if (!iommu)
2645 continue;
c42d9f32 2646
9dd2fe89
YZ
2647 if (!deferred_flush[i].next)
2648 continue;
2649
78d5f0f5
NA
2650 /* In caching mode, global flushes turn emulation expensive */
2651 if (!cap_caching_mode(iommu->cap))
2652 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2653 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2654 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2655 unsigned long mask;
2656 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
2657 struct dmar_domain *domain = deferred_flush[i].domain[j];
2658
2659 /* On real hardware multiple invalidations are expensive */
2660 if (cap_caching_mode(iommu->cap))
2661 iommu_flush_iotlb_psi(iommu, domain->id,
2662 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2663 else {
2664 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2665 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2666 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2667 }
93a23a72 2668 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2669 }
9dd2fe89 2670 deferred_flush[i].next = 0;
5e0d2a6f 2671 }
2672
5e0d2a6f 2673 list_size = 0;
5e0d2a6f 2674}
2675
2676static void flush_unmaps_timeout(unsigned long data)
2677{
80b20dd8 2678 unsigned long flags;
2679
2680 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2681 flush_unmaps();
80b20dd8 2682 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2683}
2684
2685static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2686{
2687 unsigned long flags;
80b20dd8 2688 int next, iommu_id;
8c11e798 2689 struct intel_iommu *iommu;
5e0d2a6f 2690
2691 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2692 if (list_size == HIGH_WATER_MARK)
2693 flush_unmaps();
2694
8c11e798
WH
2695 iommu = domain_get_iommu(dom);
2696 iommu_id = iommu->seq_id;
c42d9f32 2697
80b20dd8 2698 next = deferred_flush[iommu_id].next;
2699 deferred_flush[iommu_id].domain[next] = dom;
2700 deferred_flush[iommu_id].iova[next] = iova;
2701 deferred_flush[iommu_id].next++;
5e0d2a6f 2702
2703 if (!timer_on) {
2704 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2705 timer_on = 1;
2706 }
2707 list_size++;
2708 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2709}
2710
ffbbef5c
FT
2711static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2712 size_t size, enum dma_data_direction dir,
2713 struct dma_attrs *attrs)
ba395927 2714{
ba395927 2715 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2716 struct dmar_domain *domain;
d794dc9b 2717 unsigned long start_pfn, last_pfn;
ba395927 2718 struct iova *iova;
8c11e798 2719 struct intel_iommu *iommu;
ba395927 2720
73676832 2721 if (iommu_no_mapping(dev))
f76aec76 2722 return;
2c2e2c38 2723
ba395927
KA
2724 domain = find_domain(pdev);
2725 BUG_ON(!domain);
2726
8c11e798
WH
2727 iommu = domain_get_iommu(domain);
2728
ba395927 2729 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2730 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2731 (unsigned long long)dev_addr))
ba395927 2732 return;
ba395927 2733
d794dc9b
DW
2734 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2735 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2736
d794dc9b
DW
2737 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2738 pci_name(pdev), start_pfn, last_pfn);
ba395927 2739
f76aec76 2740 /* clear the whole page */
d794dc9b
DW
2741 dma_pte_clear_range(domain, start_pfn, last_pfn);
2742
f76aec76 2743 /* free page tables */
d794dc9b
DW
2744 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2745
5e0d2a6f 2746 if (intel_iommu_strict) {
03d6a246 2747 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2748 last_pfn - start_pfn + 1, 0);
5e0d2a6f 2749 /* free iova */
2750 __free_iova(&domain->iovad, iova);
2751 } else {
2752 add_unmap(domain, iova);
2753 /*
2754 * queue up the release of the unmap to save the 1/6th of the
2755 * cpu used up by the iotlb flush operation...
2756 */
5e0d2a6f 2757 }
ba395927
KA
2758}
2759
d7ab5c46
FT
2760static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2761 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2762{
2763 void *vaddr;
2764 int order;
2765
5b6985ce 2766 size = PAGE_ALIGN(size);
ba395927 2767 order = get_order(size);
e8bb910d
AW
2768
2769 if (!iommu_no_mapping(hwdev))
2770 flags &= ~(GFP_DMA | GFP_DMA32);
2771 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2772 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2773 flags |= GFP_DMA;
2774 else
2775 flags |= GFP_DMA32;
2776 }
ba395927
KA
2777
2778 vaddr = (void *)__get_free_pages(flags, order);
2779 if (!vaddr)
2780 return NULL;
2781 memset(vaddr, 0, size);
2782
bb9e6d65
FT
2783 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2784 DMA_BIDIRECTIONAL,
2785 hwdev->coherent_dma_mask);
ba395927
KA
2786 if (*dma_handle)
2787 return vaddr;
2788 free_pages((unsigned long)vaddr, order);
2789 return NULL;
2790}
2791
d7ab5c46
FT
2792static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2793 dma_addr_t dma_handle)
ba395927
KA
2794{
2795 int order;
2796
5b6985ce 2797 size = PAGE_ALIGN(size);
ba395927
KA
2798 order = get_order(size);
2799
0db9b7ae 2800 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
2801 free_pages((unsigned long)vaddr, order);
2802}
2803
d7ab5c46
FT
2804static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2805 int nelems, enum dma_data_direction dir,
2806 struct dma_attrs *attrs)
ba395927 2807{
ba395927
KA
2808 struct pci_dev *pdev = to_pci_dev(hwdev);
2809 struct dmar_domain *domain;
d794dc9b 2810 unsigned long start_pfn, last_pfn;
f76aec76 2811 struct iova *iova;
8c11e798 2812 struct intel_iommu *iommu;
ba395927 2813
73676832 2814 if (iommu_no_mapping(hwdev))
ba395927
KA
2815 return;
2816
2817 domain = find_domain(pdev);
8c11e798
WH
2818 BUG_ON(!domain);
2819
2820 iommu = domain_get_iommu(domain);
ba395927 2821
c03ab37c 2822 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
2823 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2824 (unsigned long long)sglist[0].dma_address))
f76aec76 2825 return;
f76aec76 2826
d794dc9b
DW
2827 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2828 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
2829
2830 /* clear the whole page */
d794dc9b
DW
2831 dma_pte_clear_range(domain, start_pfn, last_pfn);
2832
f76aec76 2833 /* free page tables */
d794dc9b 2834 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 2835
acea0018
DW
2836 if (intel_iommu_strict) {
2837 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2838 last_pfn - start_pfn + 1, 0);
acea0018
DW
2839 /* free iova */
2840 __free_iova(&domain->iovad, iova);
2841 } else {
2842 add_unmap(domain, iova);
2843 /*
2844 * queue up the release of the unmap to save the 1/6th of the
2845 * cpu used up by the iotlb flush operation...
2846 */
2847 }
ba395927
KA
2848}
2849
ba395927 2850static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2851 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2852{
2853 int i;
c03ab37c 2854 struct scatterlist *sg;
ba395927 2855
c03ab37c 2856 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2857 BUG_ON(!sg_page(sg));
4cf2e75d 2858 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 2859 sg->dma_length = sg->length;
ba395927
KA
2860 }
2861 return nelems;
2862}
2863
d7ab5c46
FT
2864static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2865 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 2866{
ba395927 2867 int i;
ba395927
KA
2868 struct pci_dev *pdev = to_pci_dev(hwdev);
2869 struct dmar_domain *domain;
f76aec76
KA
2870 size_t size = 0;
2871 int prot = 0;
f76aec76
KA
2872 struct iova *iova = NULL;
2873 int ret;
c03ab37c 2874 struct scatterlist *sg;
b536d24d 2875 unsigned long start_vpfn;
8c11e798 2876 struct intel_iommu *iommu;
ba395927
KA
2877
2878 BUG_ON(dir == DMA_NONE);
73676832 2879 if (iommu_no_mapping(hwdev))
c03ab37c 2880 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2881
f76aec76
KA
2882 domain = get_valid_domain_for_dev(pdev);
2883 if (!domain)
2884 return 0;
2885
8c11e798
WH
2886 iommu = domain_get_iommu(domain);
2887
b536d24d 2888 for_each_sg(sglist, sg, nelems, i)
88cb6a74 2889 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 2890
5a5e02a6
DW
2891 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2892 pdev->dma_mask);
f76aec76 2893 if (!iova) {
c03ab37c 2894 sglist->dma_length = 0;
f76aec76
KA
2895 return 0;
2896 }
2897
2898 /*
2899 * Check if DMAR supports zero-length reads on write only
2900 * mappings..
2901 */
2902 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2903 !cap_zlr(iommu->cap))
f76aec76
KA
2904 prot |= DMA_PTE_READ;
2905 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2906 prot |= DMA_PTE_WRITE;
2907
b536d24d 2908 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 2909
f532959b 2910 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
2911 if (unlikely(ret)) {
2912 /* clear the page */
2913 dma_pte_clear_range(domain, start_vpfn,
2914 start_vpfn + size - 1);
2915 /* free page tables */
2916 dma_pte_free_pagetable(domain, start_vpfn,
2917 start_vpfn + size - 1);
2918 /* free iova */
2919 __free_iova(&domain->iovad, iova);
2920 return 0;
ba395927
KA
2921 }
2922
1f0ef2aa
DW
2923 /* it's a non-present to present mapping. Only flush if caching mode */
2924 if (cap_caching_mode(iommu->cap))
82653633 2925 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
1f0ef2aa 2926 else
8c11e798 2927 iommu_flush_write_buffer(iommu);
1f0ef2aa 2928
ba395927
KA
2929 return nelems;
2930}
2931
dfb805e8
FT
2932static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
2933{
2934 return !dma_addr;
2935}
2936
160c1d8e 2937struct dma_map_ops intel_dma_ops = {
ba395927
KA
2938 .alloc_coherent = intel_alloc_coherent,
2939 .free_coherent = intel_free_coherent,
ba395927
KA
2940 .map_sg = intel_map_sg,
2941 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
2942 .map_page = intel_map_page,
2943 .unmap_page = intel_unmap_page,
dfb805e8 2944 .mapping_error = intel_mapping_error,
ba395927
KA
2945};
2946
2947static inline int iommu_domain_cache_init(void)
2948{
2949 int ret = 0;
2950
2951 iommu_domain_cache = kmem_cache_create("iommu_domain",
2952 sizeof(struct dmar_domain),
2953 0,
2954 SLAB_HWCACHE_ALIGN,
2955
2956 NULL);
2957 if (!iommu_domain_cache) {
2958 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2959 ret = -ENOMEM;
2960 }
2961
2962 return ret;
2963}
2964
2965static inline int iommu_devinfo_cache_init(void)
2966{
2967 int ret = 0;
2968
2969 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2970 sizeof(struct device_domain_info),
2971 0,
2972 SLAB_HWCACHE_ALIGN,
ba395927
KA
2973 NULL);
2974 if (!iommu_devinfo_cache) {
2975 printk(KERN_ERR "Couldn't create devinfo cache\n");
2976 ret = -ENOMEM;
2977 }
2978
2979 return ret;
2980}
2981
2982static inline int iommu_iova_cache_init(void)
2983{
2984 int ret = 0;
2985
2986 iommu_iova_cache = kmem_cache_create("iommu_iova",
2987 sizeof(struct iova),
2988 0,
2989 SLAB_HWCACHE_ALIGN,
ba395927
KA
2990 NULL);
2991 if (!iommu_iova_cache) {
2992 printk(KERN_ERR "Couldn't create iova cache\n");
2993 ret = -ENOMEM;
2994 }
2995
2996 return ret;
2997}
2998
2999static int __init iommu_init_mempool(void)
3000{
3001 int ret;
3002 ret = iommu_iova_cache_init();
3003 if (ret)
3004 return ret;
3005
3006 ret = iommu_domain_cache_init();
3007 if (ret)
3008 goto domain_error;
3009
3010 ret = iommu_devinfo_cache_init();
3011 if (!ret)
3012 return ret;
3013
3014 kmem_cache_destroy(iommu_domain_cache);
3015domain_error:
3016 kmem_cache_destroy(iommu_iova_cache);
3017
3018 return -ENOMEM;
3019}
3020
3021static void __init iommu_exit_mempool(void)
3022{
3023 kmem_cache_destroy(iommu_devinfo_cache);
3024 kmem_cache_destroy(iommu_domain_cache);
3025 kmem_cache_destroy(iommu_iova_cache);
3026
3027}
3028
556ab45f
DW
3029static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3030{
3031 struct dmar_drhd_unit *drhd;
3032 u32 vtbar;
3033 int rc;
3034
3035 /* We know that this device on this chipset has its own IOMMU.
3036 * If we find it under a different IOMMU, then the BIOS is lying
3037 * to us. Hope that the IOMMU for this device is actually
3038 * disabled, and it needs no translation...
3039 */
3040 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3041 if (rc) {
3042 /* "can't" happen */
3043 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3044 return;
3045 }
3046 vtbar &= 0xffff0000;
3047
3048 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3049 drhd = dmar_find_matched_drhd_unit(pdev);
3050 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3051 TAINT_FIRMWARE_WORKAROUND,
3052 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3053 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3054}
3055DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3056
ba395927
KA
3057static void __init init_no_remapping_devices(void)
3058{
3059 struct dmar_drhd_unit *drhd;
3060
3061 for_each_drhd_unit(drhd) {
3062 if (!drhd->include_all) {
3063 int i;
3064 for (i = 0; i < drhd->devices_cnt; i++)
3065 if (drhd->devices[i] != NULL)
3066 break;
3067 /* ignore DMAR unit if no pci devices exist */
3068 if (i == drhd->devices_cnt)
3069 drhd->ignored = 1;
3070 }
3071 }
3072
3073 if (dmar_map_gfx)
3074 return;
3075
3076 for_each_drhd_unit(drhd) {
3077 int i;
3078 if (drhd->ignored || drhd->include_all)
3079 continue;
3080
3081 for (i = 0; i < drhd->devices_cnt; i++)
3082 if (drhd->devices[i] &&
3083 !IS_GFX_DEVICE(drhd->devices[i]))
3084 break;
3085
3086 if (i < drhd->devices_cnt)
3087 continue;
3088
3089 /* bypass IOMMU if it is just for gfx devices */
3090 drhd->ignored = 1;
3091 for (i = 0; i < drhd->devices_cnt; i++) {
3092 if (!drhd->devices[i])
3093 continue;
358dd8ac 3094 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3095 }
3096 }
3097}
3098
f59c7b69
FY
3099#ifdef CONFIG_SUSPEND
3100static int init_iommu_hw(void)
3101{
3102 struct dmar_drhd_unit *drhd;
3103 struct intel_iommu *iommu = NULL;
3104
3105 for_each_active_iommu(iommu, drhd)
3106 if (iommu->qi)
3107 dmar_reenable_qi(iommu);
3108
3109 for_each_active_iommu(iommu, drhd) {
3110 iommu_flush_write_buffer(iommu);
3111
3112 iommu_set_root_entry(iommu);
3113
3114 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3115 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3116 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3117 DMA_TLB_GLOBAL_FLUSH);
f59c7b69 3118 iommu_enable_translation(iommu);
b94996c9 3119 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3120 }
3121
3122 return 0;
3123}
3124
3125static void iommu_flush_all(void)
3126{
3127 struct dmar_drhd_unit *drhd;
3128 struct intel_iommu *iommu;
3129
3130 for_each_active_iommu(iommu, drhd) {
3131 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3132 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3133 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3134 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3135 }
3136}
3137
134fac3f 3138static int iommu_suspend(void)
f59c7b69
FY
3139{
3140 struct dmar_drhd_unit *drhd;
3141 struct intel_iommu *iommu = NULL;
3142 unsigned long flag;
3143
3144 for_each_active_iommu(iommu, drhd) {
3145 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3146 GFP_ATOMIC);
3147 if (!iommu->iommu_state)
3148 goto nomem;
3149 }
3150
3151 iommu_flush_all();
3152
3153 for_each_active_iommu(iommu, drhd) {
3154 iommu_disable_translation(iommu);
3155
3156 spin_lock_irqsave(&iommu->register_lock, flag);
3157
3158 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3159 readl(iommu->reg + DMAR_FECTL_REG);
3160 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3161 readl(iommu->reg + DMAR_FEDATA_REG);
3162 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3163 readl(iommu->reg + DMAR_FEADDR_REG);
3164 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3165 readl(iommu->reg + DMAR_FEUADDR_REG);
3166
3167 spin_unlock_irqrestore(&iommu->register_lock, flag);
3168 }
3169 return 0;
3170
3171nomem:
3172 for_each_active_iommu(iommu, drhd)
3173 kfree(iommu->iommu_state);
3174
3175 return -ENOMEM;
3176}
3177
134fac3f 3178static void iommu_resume(void)
f59c7b69
FY
3179{
3180 struct dmar_drhd_unit *drhd;
3181 struct intel_iommu *iommu = NULL;
3182 unsigned long flag;
3183
3184 if (init_iommu_hw()) {
3185 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3186 return;
f59c7b69
FY
3187 }
3188
3189 for_each_active_iommu(iommu, drhd) {
3190
3191 spin_lock_irqsave(&iommu->register_lock, flag);
3192
3193 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3194 iommu->reg + DMAR_FECTL_REG);
3195 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3196 iommu->reg + DMAR_FEDATA_REG);
3197 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3198 iommu->reg + DMAR_FEADDR_REG);
3199 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3200 iommu->reg + DMAR_FEUADDR_REG);
3201
3202 spin_unlock_irqrestore(&iommu->register_lock, flag);
3203 }
3204
3205 for_each_active_iommu(iommu, drhd)
3206 kfree(iommu->iommu_state);
f59c7b69
FY
3207}
3208
134fac3f 3209static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3210 .resume = iommu_resume,
3211 .suspend = iommu_suspend,
3212};
3213
134fac3f 3214static void __init init_iommu_pm_ops(void)
f59c7b69 3215{
134fac3f 3216 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3217}
3218
3219#else
134fac3f 3220static inline int init_iommu_pm_ops(void) { }
f59c7b69
FY
3221#endif /* CONFIG_PM */
3222
99dcaded
FY
3223/*
3224 * Here we only respond to action of unbound device from driver.
3225 *
3226 * Added device is not attached to its DMAR domain here yet. That will happen
3227 * when mapping the device to iova.
3228 */
3229static int device_notifier(struct notifier_block *nb,
3230 unsigned long action, void *data)
3231{
3232 struct device *dev = data;
3233 struct pci_dev *pdev = to_pci_dev(dev);
3234 struct dmar_domain *domain;
3235
44cd613c
DW
3236 if (iommu_no_mapping(dev))
3237 return 0;
3238
99dcaded
FY
3239 domain = find_domain(pdev);
3240 if (!domain)
3241 return 0;
3242
3243 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through)
3244 domain_remove_one_dev_info(domain, pdev);
3245
3246 return 0;
3247}
3248
3249static struct notifier_block device_nb = {
3250 .notifier_call = device_notifier,
3251};
3252
ba395927
KA
3253int __init intel_iommu_init(void)
3254{
3255 int ret = 0;
a59b50e9 3256 int force_on = 0;
ba395927 3257
a59b50e9
JC
3258 /* VT-d is required for a TXT/tboot launch, so enforce that */
3259 force_on = tboot_force_iommu();
3260
3261 if (dmar_table_init()) {
3262 if (force_on)
3263 panic("tboot: Failed to initialize DMAR table\n");
ba395927 3264 return -ENODEV;
a59b50e9 3265 }
ba395927 3266
a59b50e9
JC
3267 if (dmar_dev_scope_init()) {
3268 if (force_on)
3269 panic("tboot: Failed to initialize DMAR device scope\n");
1886e8a9 3270 return -ENODEV;
a59b50e9 3271 }
1886e8a9 3272
2ae21010
SS
3273 /*
3274 * Check the need for DMA-remapping initialization now.
3275 * Above initialization will also be used by Interrupt-remapping.
3276 */
75f1cdf1 3277 if (no_iommu || dmar_disabled)
2ae21010
SS
3278 return -ENODEV;
3279
ba395927
KA
3280 iommu_init_mempool();
3281 dmar_init_reserved_ranges();
3282
3283 init_no_remapping_devices();
3284
3285 ret = init_dmars();
3286 if (ret) {
a59b50e9
JC
3287 if (force_on)
3288 panic("tboot: Failed to initialize DMARs\n");
ba395927
KA
3289 printk(KERN_ERR "IOMMU: dmar init failed\n");
3290 put_iova_domain(&reserved_iova_list);
3291 iommu_exit_mempool();
3292 return ret;
3293 }
3294 printk(KERN_INFO
3295 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3296
5e0d2a6f 3297 init_timer(&unmap_timer);
75f1cdf1
FT
3298#ifdef CONFIG_SWIOTLB
3299 swiotlb = 0;
3300#endif
19943b0e 3301 dma_ops = &intel_dma_ops;
4ed0d3e6 3302
134fac3f 3303 init_iommu_pm_ops();
a8bcbb0d
JR
3304
3305 register_iommu(&intel_iommu_ops);
3306
99dcaded
FY
3307 bus_register_notifier(&pci_bus_type, &device_nb);
3308
ba395927
KA
3309 return 0;
3310}
e820482c 3311
3199aa6b
HW
3312static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3313 struct pci_dev *pdev)
3314{
3315 struct pci_dev *tmp, *parent;
3316
3317 if (!iommu || !pdev)
3318 return;
3319
3320 /* dependent device detach */
3321 tmp = pci_find_upstream_pcie_bridge(pdev);
3322 /* Secondary interface's bus number and devfn 0 */
3323 if (tmp) {
3324 parent = pdev->bus->self;
3325 while (parent != tmp) {
3326 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3327 parent->devfn);
3199aa6b
HW
3328 parent = parent->bus->self;
3329 }
45e829ea 3330 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
3331 iommu_detach_dev(iommu,
3332 tmp->subordinate->number, 0);
3333 else /* this is a legacy PCI bridge */
276dbf99
DW
3334 iommu_detach_dev(iommu, tmp->bus->number,
3335 tmp->devfn);
3199aa6b
HW
3336 }
3337}
3338
2c2e2c38 3339static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3340 struct pci_dev *pdev)
3341{
3342 struct device_domain_info *info;
3343 struct intel_iommu *iommu;
3344 unsigned long flags;
3345 int found = 0;
3346 struct list_head *entry, *tmp;
3347
276dbf99
DW
3348 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3349 pdev->devfn);
c7151a8d
WH
3350 if (!iommu)
3351 return;
3352
3353 spin_lock_irqsave(&device_domain_lock, flags);
3354 list_for_each_safe(entry, tmp, &domain->devices) {
3355 info = list_entry(entry, struct device_domain_info, link);
276dbf99 3356 /* No need to compare PCI domain; it has to be the same */
c7151a8d
WH
3357 if (info->bus == pdev->bus->number &&
3358 info->devfn == pdev->devfn) {
3359 list_del(&info->link);
3360 list_del(&info->global);
3361 if (info->dev)
3362 info->dev->dev.archdata.iommu = NULL;
3363 spin_unlock_irqrestore(&device_domain_lock, flags);
3364
93a23a72 3365 iommu_disable_dev_iotlb(info);
c7151a8d 3366 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3367 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3368 free_devinfo_mem(info);
3369
3370 spin_lock_irqsave(&device_domain_lock, flags);
3371
3372 if (found)
3373 break;
3374 else
3375 continue;
3376 }
3377
3378 /* if there is no other devices under the same iommu
3379 * owned by this domain, clear this iommu in iommu_bmp
3380 * update iommu count and coherency
3381 */
276dbf99
DW
3382 if (iommu == device_to_iommu(info->segment, info->bus,
3383 info->devfn))
c7151a8d
WH
3384 found = 1;
3385 }
3386
3387 if (found == 0) {
3388 unsigned long tmp_flags;
3389 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3390 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3391 domain->iommu_count--;
58c610bd 3392 domain_update_iommu_cap(domain);
c7151a8d
WH
3393 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3394 }
3395
3396 spin_unlock_irqrestore(&device_domain_lock, flags);
3397}
3398
3399static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3400{
3401 struct device_domain_info *info;
3402 struct intel_iommu *iommu;
3403 unsigned long flags1, flags2;
3404
3405 spin_lock_irqsave(&device_domain_lock, flags1);
3406 while (!list_empty(&domain->devices)) {
3407 info = list_entry(domain->devices.next,
3408 struct device_domain_info, link);
3409 list_del(&info->link);
3410 list_del(&info->global);
3411 if (info->dev)
3412 info->dev->dev.archdata.iommu = NULL;
3413
3414 spin_unlock_irqrestore(&device_domain_lock, flags1);
3415
93a23a72 3416 iommu_disable_dev_iotlb(info);
276dbf99 3417 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 3418 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3419 iommu_detach_dependent_devices(iommu, info->dev);
c7151a8d
WH
3420
3421 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 3422 * and capabilities
c7151a8d
WH
3423 */
3424 spin_lock_irqsave(&domain->iommu_lock, flags2);
3425 if (test_and_clear_bit(iommu->seq_id,
3426 &domain->iommu_bmp)) {
3427 domain->iommu_count--;
58c610bd 3428 domain_update_iommu_cap(domain);
c7151a8d
WH
3429 }
3430 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3431
3432 free_devinfo_mem(info);
3433 spin_lock_irqsave(&device_domain_lock, flags1);
3434 }
3435 spin_unlock_irqrestore(&device_domain_lock, flags1);
3436}
3437
5e98c4b1
WH
3438/* domain id for virtual machine, it won't be set in context */
3439static unsigned long vm_domid;
3440
3441static struct dmar_domain *iommu_alloc_vm_domain(void)
3442{
3443 struct dmar_domain *domain;
3444
3445 domain = alloc_domain_mem();
3446 if (!domain)
3447 return NULL;
3448
3449 domain->id = vm_domid++;
4c923d47 3450 domain->nid = -1;
5e98c4b1
WH
3451 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3452 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3453
3454 return domain;
3455}
3456
2c2e2c38 3457static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3458{
3459 int adjust_width;
3460
3461 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3462 spin_lock_init(&domain->iommu_lock);
3463
3464 domain_reserve_special_ranges(domain);
3465
3466 /* calculate AGAW */
3467 domain->gaw = guest_width;
3468 adjust_width = guestwidth_to_adjustwidth(guest_width);
3469 domain->agaw = width_to_agaw(adjust_width);
3470
3471 INIT_LIST_HEAD(&domain->devices);
3472
3473 domain->iommu_count = 0;
3474 domain->iommu_coherency = 0;
c5b15255 3475 domain->iommu_snooping = 0;
fe40f1e0 3476 domain->max_addr = 0;
4c923d47 3477 domain->nid = -1;
5e98c4b1
WH
3478
3479 /* always allocate the top pgd */
4c923d47 3480 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3481 if (!domain->pgd)
3482 return -ENOMEM;
3483 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3484 return 0;
3485}
3486
3487static void iommu_free_vm_domain(struct dmar_domain *domain)
3488{
3489 unsigned long flags;
3490 struct dmar_drhd_unit *drhd;
3491 struct intel_iommu *iommu;
3492 unsigned long i;
3493 unsigned long ndomains;
3494
3495 for_each_drhd_unit(drhd) {
3496 if (drhd->ignored)
3497 continue;
3498 iommu = drhd->iommu;
3499
3500 ndomains = cap_ndoms(iommu->cap);
a45946ab 3501 for_each_set_bit(i, iommu->domain_ids, ndomains) {
5e98c4b1
WH
3502 if (iommu->domains[i] == domain) {
3503 spin_lock_irqsave(&iommu->lock, flags);
3504 clear_bit(i, iommu->domain_ids);
3505 iommu->domains[i] = NULL;
3506 spin_unlock_irqrestore(&iommu->lock, flags);
3507 break;
3508 }
5e98c4b1
WH
3509 }
3510 }
3511}
3512
3513static void vm_domain_exit(struct dmar_domain *domain)
3514{
5e98c4b1
WH
3515 /* Domain 0 is reserved, so dont process it */
3516 if (!domain)
3517 return;
3518
3519 vm_domain_remove_all_dev_info(domain);
3520 /* destroy iovas */
3521 put_iova_domain(&domain->iovad);
5e98c4b1
WH
3522
3523 /* clear ptes */
595badf5 3524 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3525
3526 /* free page tables */
d794dc9b 3527 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3528
3529 iommu_free_vm_domain(domain);
3530 free_domain_mem(domain);
3531}
3532
5d450806 3533static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3534{
5d450806 3535 struct dmar_domain *dmar_domain;
38717946 3536
5d450806
JR
3537 dmar_domain = iommu_alloc_vm_domain();
3538 if (!dmar_domain) {
38717946 3539 printk(KERN_ERR
5d450806
JR
3540 "intel_iommu_domain_init: dmar_domain == NULL\n");
3541 return -ENOMEM;
38717946 3542 }
2c2e2c38 3543 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3544 printk(KERN_ERR
5d450806
JR
3545 "intel_iommu_domain_init() failed\n");
3546 vm_domain_exit(dmar_domain);
3547 return -ENOMEM;
38717946 3548 }
5d450806 3549 domain->priv = dmar_domain;
faa3d6f5 3550
5d450806 3551 return 0;
38717946 3552}
38717946 3553
5d450806 3554static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3555{
5d450806
JR
3556 struct dmar_domain *dmar_domain = domain->priv;
3557
3558 domain->priv = NULL;
3559 vm_domain_exit(dmar_domain);
38717946 3560}
38717946 3561
4c5478c9
JR
3562static int intel_iommu_attach_device(struct iommu_domain *domain,
3563 struct device *dev)
38717946 3564{
4c5478c9
JR
3565 struct dmar_domain *dmar_domain = domain->priv;
3566 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3567 struct intel_iommu *iommu;
3568 int addr_width;
faa3d6f5
WH
3569
3570 /* normally pdev is not mapped */
3571 if (unlikely(domain_context_mapped(pdev))) {
3572 struct dmar_domain *old_domain;
3573
3574 old_domain = find_domain(pdev);
3575 if (old_domain) {
2c2e2c38
FY
3576 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3577 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3578 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3579 else
3580 domain_remove_dev_info(old_domain);
3581 }
3582 }
3583
276dbf99
DW
3584 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3585 pdev->devfn);
fe40f1e0
WH
3586 if (!iommu)
3587 return -ENODEV;
3588
3589 /* check if this iommu agaw is sufficient for max mapped address */
3590 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
3591 if (addr_width > cap_mgaw(iommu->cap))
3592 addr_width = cap_mgaw(iommu->cap);
3593
3594 if (dmar_domain->max_addr > (1LL << addr_width)) {
3595 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3596 "sufficient for the mapped address (%llx)\n",
a99c47a2 3597 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
3598 return -EFAULT;
3599 }
a99c47a2
TL
3600 dmar_domain->gaw = addr_width;
3601
3602 /*
3603 * Knock out extra levels of page tables if necessary
3604 */
3605 while (iommu->agaw < dmar_domain->agaw) {
3606 struct dma_pte *pte;
3607
3608 pte = dmar_domain->pgd;
3609 if (dma_pte_present(pte)) {
3610 free_pgtable_page(dmar_domain->pgd);
25cbff16
SY
3611 dmar_domain->pgd = (struct dma_pte *)
3612 phys_to_virt(dma_pte_addr(pte));
a99c47a2
TL
3613 }
3614 dmar_domain->agaw--;
3615 }
fe40f1e0 3616
5fe60f4e 3617 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 3618}
38717946 3619
4c5478c9
JR
3620static void intel_iommu_detach_device(struct iommu_domain *domain,
3621 struct device *dev)
38717946 3622{
4c5478c9
JR
3623 struct dmar_domain *dmar_domain = domain->priv;
3624 struct pci_dev *pdev = to_pci_dev(dev);
3625
2c2e2c38 3626 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3627}
c7151a8d 3628
b146a1c9
JR
3629static int intel_iommu_map(struct iommu_domain *domain,
3630 unsigned long iova, phys_addr_t hpa,
3631 int gfp_order, int iommu_prot)
faa3d6f5 3632{
dde57a21 3633 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 3634 u64 max_addr;
dde57a21 3635 int prot = 0;
b146a1c9 3636 size_t size;
faa3d6f5 3637 int ret;
fe40f1e0 3638
dde57a21
JR
3639 if (iommu_prot & IOMMU_READ)
3640 prot |= DMA_PTE_READ;
3641 if (iommu_prot & IOMMU_WRITE)
3642 prot |= DMA_PTE_WRITE;
9cf06697
SY
3643 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3644 prot |= DMA_PTE_SNP;
dde57a21 3645
b146a1c9 3646 size = PAGE_SIZE << gfp_order;
163cc52c 3647 max_addr = iova + size;
dde57a21 3648 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3649 u64 end;
3650
3651 /* check if minimum agaw is sufficient for mapped address */
8954da1f 3652 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 3653 if (end < max_addr) {
8954da1f 3654 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3655 "sufficient for the mapped address (%llx)\n",
8954da1f 3656 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
3657 return -EFAULT;
3658 }
dde57a21 3659 dmar_domain->max_addr = max_addr;
fe40f1e0 3660 }
ad051221
DW
3661 /* Round up size to next multiple of PAGE_SIZE, if it and
3662 the low bits of hpa would take us onto the next page */
88cb6a74 3663 size = aligned_nrpages(hpa, size);
ad051221
DW
3664 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3665 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 3666 return ret;
38717946 3667}
38717946 3668
b146a1c9
JR
3669static int intel_iommu_unmap(struct iommu_domain *domain,
3670 unsigned long iova, int gfp_order)
38717946 3671{
dde57a21 3672 struct dmar_domain *dmar_domain = domain->priv;
b146a1c9 3673 size_t size = PAGE_SIZE << gfp_order;
4b99d352 3674
163cc52c
DW
3675 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3676 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 3677
163cc52c
DW
3678 if (dmar_domain->max_addr == iova + size)
3679 dmar_domain->max_addr = iova;
b146a1c9
JR
3680
3681 return gfp_order;
38717946 3682}
38717946 3683
d14d6577
JR
3684static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3685 unsigned long iova)
38717946 3686{
d14d6577 3687 struct dmar_domain *dmar_domain = domain->priv;
38717946 3688 struct dma_pte *pte;
faa3d6f5 3689 u64 phys = 0;
38717946 3690
b026fd28 3691 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
38717946 3692 if (pte)
faa3d6f5 3693 phys = dma_pte_addr(pte);
38717946 3694
faa3d6f5 3695 return phys;
38717946 3696}
a8bcbb0d 3697
dbb9fd86
SY
3698static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3699 unsigned long cap)
3700{
3701 struct dmar_domain *dmar_domain = domain->priv;
3702
3703 if (cap == IOMMU_CAP_CACHE_COHERENCY)
3704 return dmar_domain->iommu_snooping;
323f99cb
TL
3705 if (cap == IOMMU_CAP_INTR_REMAP)
3706 return intr_remapping_enabled;
dbb9fd86
SY
3707
3708 return 0;
3709}
3710
a8bcbb0d
JR
3711static struct iommu_ops intel_iommu_ops = {
3712 .domain_init = intel_iommu_domain_init,
3713 .domain_destroy = intel_iommu_domain_destroy,
3714 .attach_dev = intel_iommu_attach_device,
3715 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
3716 .map = intel_iommu_map,
3717 .unmap = intel_iommu_unmap,
a8bcbb0d 3718 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 3719 .domain_has_cap = intel_iommu_domain_has_cap,
a8bcbb0d 3720};
9af88143
DW
3721
3722static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3723{
3724 /*
3725 * Mobile 4 Series Chipset neglects to set RWBF capability,
3726 * but needs it:
3727 */
3728 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3729 rwbf_quirk = 1;
2d9e667e
DW
3730
3731 /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3732 if (dev->revision == 0x07) {
3733 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
3734 dmar_map_gfx = 0;
3735 }
9af88143
DW
3736}
3737
3738DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
e0fc7e0b 3739
eecfd57f
AJ
3740#define GGC 0x52
3741#define GGC_MEMORY_SIZE_MASK (0xf << 8)
3742#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
3743#define GGC_MEMORY_SIZE_1M (0x1 << 8)
3744#define GGC_MEMORY_SIZE_2M (0x3 << 8)
3745#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
3746#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
3747#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
3748#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
3749
9eecabcb
DW
3750static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
3751{
3752 unsigned short ggc;
3753
eecfd57f 3754 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
3755 return;
3756
eecfd57f 3757 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
3758 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3759 dmar_map_gfx = 0;
3760 }
3761}
3762DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
3763DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
3764DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
3765DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
3766
e0fc7e0b
DW
3767/* On Tylersburg chipsets, some BIOSes have been known to enable the
3768 ISOCH DMAR unit for the Azalia sound device, but not give it any
3769 TLB entries, which causes it to deadlock. Check for that. We do
3770 this in a function called from init_dmars(), instead of in a PCI
3771 quirk, because we don't want to print the obnoxious "BIOS broken"
3772 message if VT-d is actually disabled.
3773*/
3774static void __init check_tylersburg_isoch(void)
3775{
3776 struct pci_dev *pdev;
3777 uint32_t vtisochctrl;
3778
3779 /* If there's no Azalia in the system anyway, forget it. */
3780 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3781 if (!pdev)
3782 return;
3783 pci_dev_put(pdev);
3784
3785 /* System Management Registers. Might be hidden, in which case
3786 we can't do the sanity check. But that's OK, because the
3787 known-broken BIOSes _don't_ actually hide it, so far. */
3788 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3789 if (!pdev)
3790 return;
3791
3792 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3793 pci_dev_put(pdev);
3794 return;
3795 }
3796
3797 pci_dev_put(pdev);
3798
3799 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3800 if (vtisochctrl & 1)
3801 return;
3802
3803 /* Drop all bits other than the number of TLB entries */
3804 vtisochctrl &= 0x1c;
3805
3806 /* If we have the recommended number of TLB entries (16), fine. */
3807 if (vtisochctrl == 0x10)
3808 return;
3809
3810 /* Zero TLB entries? You get to ride the short bus to school. */
3811 if (!vtisochctrl) {
3812 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
3813 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3814 dmi_get_system_info(DMI_BIOS_VENDOR),
3815 dmi_get_system_info(DMI_BIOS_VERSION),
3816 dmi_get_system_info(DMI_PRODUCT_VERSION));
3817 iommu_identity_mapping |= IDENTMAP_AZALIA;
3818 return;
3819 }
3820
3821 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
3822 vtisochctrl);
3823}