dmar: remove the quirk which disables dma-remapping when intr-remapping enabled
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
ba395927
KA
21 */
22
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
ba395927
KA
26#include <linux/slab.h>
27#include <linux/irq.h>
28#include <linux/interrupt.h>
29#include <linux/sysdev.h>
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946
KA
36#include <linux/iova.h>
37#include <linux/intel-iommu.h>
ba395927
KA
38#include <asm/proto.h> /* force_iommu in this header in x86-64*/
39#include <asm/cacheflush.h>
46a7fa27 40#include <asm/iommu.h>
ba395927
KA
41#include "pci.h"
42
43#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
44#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
45
46#define IOAPIC_RANGE_START (0xfee00000)
47#define IOAPIC_RANGE_END (0xfeefffff)
48#define IOVA_START_ADDR (0x1000)
49
50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
51
ba395927
KA
52#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
53
5e0d2a6f 54
55static void flush_unmaps_timeout(unsigned long data);
56
57DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
58
80b20dd8 59#define HIGH_WATER_MARK 250
60struct deferred_flush_tables {
61 int next;
62 struct iova *iova[HIGH_WATER_MARK];
63 struct dmar_domain *domain[HIGH_WATER_MARK];
64};
65
66static struct deferred_flush_tables *deferred_flush;
67
5e0d2a6f 68/* bitmap for indexing intel_iommus */
5e0d2a6f 69static int g_num_of_iommus;
70
71static DEFINE_SPINLOCK(async_umap_flush_lock);
72static LIST_HEAD(unmaps_to_do);
73
74static int timer_on;
75static long list_size;
5e0d2a6f 76
ba395927
KA
77static void domain_remove_dev_info(struct dmar_domain *domain);
78
2ae21010 79int dmar_disabled;
ba395927 80static int __initdata dmar_map_gfx = 1;
7d3b03ce 81static int dmar_forcedac;
5e0d2a6f 82static int intel_iommu_strict;
ba395927
KA
83
84#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
85static DEFINE_SPINLOCK(device_domain_lock);
86static LIST_HEAD(device_domain_list);
87
88static int __init intel_iommu_setup(char *str)
89{
90 if (!str)
91 return -EINVAL;
92 while (*str) {
93 if (!strncmp(str, "off", 3)) {
94 dmar_disabled = 1;
95 printk(KERN_INFO"Intel-IOMMU: disabled\n");
96 } else if (!strncmp(str, "igfx_off", 8)) {
97 dmar_map_gfx = 0;
98 printk(KERN_INFO
99 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 100 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 101 printk(KERN_INFO
7d3b03ce
KA
102 "Intel-IOMMU: Forcing DAC for PCI devices\n");
103 dmar_forcedac = 1;
5e0d2a6f 104 } else if (!strncmp(str, "strict", 6)) {
105 printk(KERN_INFO
106 "Intel-IOMMU: disable batched IOTLB flush\n");
107 intel_iommu_strict = 1;
ba395927
KA
108 }
109
110 str += strcspn(str, ",");
111 while (*str == ',')
112 str++;
113 }
114 return 0;
115}
116__setup("intel_iommu=", intel_iommu_setup);
117
118static struct kmem_cache *iommu_domain_cache;
119static struct kmem_cache *iommu_devinfo_cache;
120static struct kmem_cache *iommu_iova_cache;
121
eb3fa7cb
KA
122static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
123{
124 unsigned int flags;
125 void *vaddr;
126
127 /* trying to avoid low memory issues */
128 flags = current->flags & PF_MEMALLOC;
129 current->flags |= PF_MEMALLOC;
130 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
131 current->flags &= (~PF_MEMALLOC | flags);
132 return vaddr;
133}
134
135
ba395927
KA
136static inline void *alloc_pgtable_page(void)
137{
eb3fa7cb
KA
138 unsigned int flags;
139 void *vaddr;
140
141 /* trying to avoid low memory issues */
142 flags = current->flags & PF_MEMALLOC;
143 current->flags |= PF_MEMALLOC;
144 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
145 current->flags &= (~PF_MEMALLOC | flags);
146 return vaddr;
ba395927
KA
147}
148
149static inline void free_pgtable_page(void *vaddr)
150{
151 free_page((unsigned long)vaddr);
152}
153
154static inline void *alloc_domain_mem(void)
155{
eb3fa7cb 156 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
157}
158
38717946 159static void free_domain_mem(void *vaddr)
ba395927
KA
160{
161 kmem_cache_free(iommu_domain_cache, vaddr);
162}
163
164static inline void * alloc_devinfo_mem(void)
165{
eb3fa7cb 166 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
167}
168
169static inline void free_devinfo_mem(void *vaddr)
170{
171 kmem_cache_free(iommu_devinfo_cache, vaddr);
172}
173
174struct iova *alloc_iova_mem(void)
175{
eb3fa7cb 176 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
177}
178
179void free_iova_mem(struct iova *iova)
180{
181 kmem_cache_free(iommu_iova_cache, iova);
182}
183
ba395927
KA
184/* Gets context entry for a given bus and devfn */
185static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
186 u8 bus, u8 devfn)
187{
188 struct root_entry *root;
189 struct context_entry *context;
190 unsigned long phy_addr;
191 unsigned long flags;
192
193 spin_lock_irqsave(&iommu->lock, flags);
194 root = &iommu->root_entry[bus];
195 context = get_context_addr_from_root(root);
196 if (!context) {
197 context = (struct context_entry *)alloc_pgtable_page();
198 if (!context) {
199 spin_unlock_irqrestore(&iommu->lock, flags);
200 return NULL;
201 }
202 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
203 phy_addr = virt_to_phys((void *)context);
204 set_root_value(root, phy_addr);
205 set_root_present(root);
206 __iommu_flush_cache(iommu, root, sizeof(*root));
207 }
208 spin_unlock_irqrestore(&iommu->lock, flags);
209 return &context[devfn];
210}
211
212static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
213{
214 struct root_entry *root;
215 struct context_entry *context;
216 int ret;
217 unsigned long flags;
218
219 spin_lock_irqsave(&iommu->lock, flags);
220 root = &iommu->root_entry[bus];
221 context = get_context_addr_from_root(root);
222 if (!context) {
223 ret = 0;
224 goto out;
225 }
226 ret = context_present(context[devfn]);
227out:
228 spin_unlock_irqrestore(&iommu->lock, flags);
229 return ret;
230}
231
232static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
233{
234 struct root_entry *root;
235 struct context_entry *context;
236 unsigned long flags;
237
238 spin_lock_irqsave(&iommu->lock, flags);
239 root = &iommu->root_entry[bus];
240 context = get_context_addr_from_root(root);
241 if (context) {
242 context_clear_entry(context[devfn]);
243 __iommu_flush_cache(iommu, &context[devfn], \
244 sizeof(*context));
245 }
246 spin_unlock_irqrestore(&iommu->lock, flags);
247}
248
249static void free_context_table(struct intel_iommu *iommu)
250{
251 struct root_entry *root;
252 int i;
253 unsigned long flags;
254 struct context_entry *context;
255
256 spin_lock_irqsave(&iommu->lock, flags);
257 if (!iommu->root_entry) {
258 goto out;
259 }
260 for (i = 0; i < ROOT_ENTRY_NR; i++) {
261 root = &iommu->root_entry[i];
262 context = get_context_addr_from_root(root);
263 if (context)
264 free_pgtable_page(context);
265 }
266 free_pgtable_page(iommu->root_entry);
267 iommu->root_entry = NULL;
268out:
269 spin_unlock_irqrestore(&iommu->lock, flags);
270}
271
272/* page table handling */
273#define LEVEL_STRIDE (9)
274#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
275
276static inline int agaw_to_level(int agaw)
277{
278 return agaw + 2;
279}
280
281static inline int agaw_to_width(int agaw)
282{
283 return 30 + agaw * LEVEL_STRIDE;
284
285}
286
287static inline int width_to_agaw(int width)
288{
289 return (width - 30) / LEVEL_STRIDE;
290}
291
292static inline unsigned int level_to_offset_bits(int level)
293{
294 return (12 + (level - 1) * LEVEL_STRIDE);
295}
296
297static inline int address_level_offset(u64 addr, int level)
298{
299 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
300}
301
302static inline u64 level_mask(int level)
303{
304 return ((u64)-1 << level_to_offset_bits(level));
305}
306
307static inline u64 level_size(int level)
308{
309 return ((u64)1 << level_to_offset_bits(level));
310}
311
312static inline u64 align_to_level(u64 addr, int level)
313{
314 return ((addr + level_size(level) - 1) & level_mask(level));
315}
316
317static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
318{
319 int addr_width = agaw_to_width(domain->agaw);
320 struct dma_pte *parent, *pte = NULL;
321 int level = agaw_to_level(domain->agaw);
322 int offset;
323 unsigned long flags;
324
325 BUG_ON(!domain->pgd);
326
327 addr &= (((u64)1) << addr_width) - 1;
328 parent = domain->pgd;
329
330 spin_lock_irqsave(&domain->mapping_lock, flags);
331 while (level > 0) {
332 void *tmp_page;
333
334 offset = address_level_offset(addr, level);
335 pte = &parent[offset];
336 if (level == 1)
337 break;
338
339 if (!dma_pte_present(*pte)) {
340 tmp_page = alloc_pgtable_page();
341
342 if (!tmp_page) {
343 spin_unlock_irqrestore(&domain->mapping_lock,
344 flags);
345 return NULL;
346 }
347 __iommu_flush_cache(domain->iommu, tmp_page,
348 PAGE_SIZE_4K);
349 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
350 /*
351 * high level table always sets r/w, last level page
352 * table control read/write
353 */
354 dma_set_pte_readable(*pte);
355 dma_set_pte_writable(*pte);
356 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
357 }
358 parent = phys_to_virt(dma_pte_addr(*pte));
359 level--;
360 }
361
362 spin_unlock_irqrestore(&domain->mapping_lock, flags);
363 return pte;
364}
365
366/* return address's pte at specific level */
367static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
368 int level)
369{
370 struct dma_pte *parent, *pte = NULL;
371 int total = agaw_to_level(domain->agaw);
372 int offset;
373
374 parent = domain->pgd;
375 while (level <= total) {
376 offset = address_level_offset(addr, total);
377 pte = &parent[offset];
378 if (level == total)
379 return pte;
380
381 if (!dma_pte_present(*pte))
382 break;
383 parent = phys_to_virt(dma_pte_addr(*pte));
384 total--;
385 }
386 return NULL;
387}
388
389/* clear one page's page table */
390static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
391{
392 struct dma_pte *pte = NULL;
393
394 /* get last level pte */
395 pte = dma_addr_level_pte(domain, addr, 1);
396
397 if (pte) {
398 dma_clear_pte(*pte);
399 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
400 }
401}
402
403/* clear last level pte, a tlb flush should be followed */
404static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
405{
406 int addr_width = agaw_to_width(domain->agaw);
407
408 start &= (((u64)1) << addr_width) - 1;
409 end &= (((u64)1) << addr_width) - 1;
410 /* in case it's partial page */
411 start = PAGE_ALIGN_4K(start);
412 end &= PAGE_MASK_4K;
413
414 /* we don't need lock here, nobody else touches the iova range */
415 while (start < end) {
416 dma_pte_clear_one(domain, start);
417 start += PAGE_SIZE_4K;
418 }
419}
420
421/* free page table pages. last level pte should already be cleared */
422static void dma_pte_free_pagetable(struct dmar_domain *domain,
423 u64 start, u64 end)
424{
425 int addr_width = agaw_to_width(domain->agaw);
426 struct dma_pte *pte;
427 int total = agaw_to_level(domain->agaw);
428 int level;
429 u64 tmp;
430
431 start &= (((u64)1) << addr_width) - 1;
432 end &= (((u64)1) << addr_width) - 1;
433
434 /* we don't need lock here, nobody else touches the iova range */
435 level = 2;
436 while (level <= total) {
437 tmp = align_to_level(start, level);
438 if (tmp >= end || (tmp + level_size(level) > end))
439 return;
440
441 while (tmp < end) {
442 pte = dma_addr_level_pte(domain, tmp, level);
443 if (pte) {
444 free_pgtable_page(
445 phys_to_virt(dma_pte_addr(*pte)));
446 dma_clear_pte(*pte);
447 __iommu_flush_cache(domain->iommu,
448 pte, sizeof(*pte));
449 }
450 tmp += level_size(level);
451 }
452 level++;
453 }
454 /* free pgd */
455 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
456 free_pgtable_page(domain->pgd);
457 domain->pgd = NULL;
458 }
459}
460
461/* iommu handling */
462static int iommu_alloc_root_entry(struct intel_iommu *iommu)
463{
464 struct root_entry *root;
465 unsigned long flags;
466
467 root = (struct root_entry *)alloc_pgtable_page();
468 if (!root)
469 return -ENOMEM;
470
471 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
472
473 spin_lock_irqsave(&iommu->lock, flags);
474 iommu->root_entry = root;
475 spin_unlock_irqrestore(&iommu->lock, flags);
476
477 return 0;
478}
479
ba395927
KA
480static void iommu_set_root_entry(struct intel_iommu *iommu)
481{
482 void *addr;
483 u32 cmd, sts;
484 unsigned long flag;
485
486 addr = iommu->root_entry;
487
488 spin_lock_irqsave(&iommu->register_lock, flag);
489 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
490
491 cmd = iommu->gcmd | DMA_GCMD_SRTP;
492 writel(cmd, iommu->reg + DMAR_GCMD_REG);
493
494 /* Make sure hardware complete it */
495 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
496 readl, (sts & DMA_GSTS_RTPS), sts);
497
498 spin_unlock_irqrestore(&iommu->register_lock, flag);
499}
500
501static void iommu_flush_write_buffer(struct intel_iommu *iommu)
502{
503 u32 val;
504 unsigned long flag;
505
506 if (!cap_rwbf(iommu->cap))
507 return;
508 val = iommu->gcmd | DMA_GCMD_WBF;
509
510 spin_lock_irqsave(&iommu->register_lock, flag);
511 writel(val, iommu->reg + DMAR_GCMD_REG);
512
513 /* Make sure hardware complete it */
514 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
515 readl, (!(val & DMA_GSTS_WBFS)), val);
516
517 spin_unlock_irqrestore(&iommu->register_lock, flag);
518}
519
520/* return value determine if we need a write buffer flush */
521static int __iommu_flush_context(struct intel_iommu *iommu,
522 u16 did, u16 source_id, u8 function_mask, u64 type,
523 int non_present_entry_flush)
524{
525 u64 val = 0;
526 unsigned long flag;
527
528 /*
529 * In the non-present entry flush case, if hardware doesn't cache
530 * non-present entry we do nothing and if hardware cache non-present
531 * entry, we flush entries of domain 0 (the domain id is used to cache
532 * any non-present entries)
533 */
534 if (non_present_entry_flush) {
535 if (!cap_caching_mode(iommu->cap))
536 return 1;
537 else
538 did = 0;
539 }
540
541 switch (type) {
542 case DMA_CCMD_GLOBAL_INVL:
543 val = DMA_CCMD_GLOBAL_INVL;
544 break;
545 case DMA_CCMD_DOMAIN_INVL:
546 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
547 break;
548 case DMA_CCMD_DEVICE_INVL:
549 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
550 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
551 break;
552 default:
553 BUG();
554 }
555 val |= DMA_CCMD_ICC;
556
557 spin_lock_irqsave(&iommu->register_lock, flag);
558 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
559
560 /* Make sure hardware complete it */
561 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
562 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
563
564 spin_unlock_irqrestore(&iommu->register_lock, flag);
565
566 /* flush context entry will implictly flush write buffer */
567 return 0;
568}
569
ba395927
KA
570/* return value determine if we need a write buffer flush */
571static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
572 u64 addr, unsigned int size_order, u64 type,
573 int non_present_entry_flush)
574{
575 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
576 u64 val = 0, val_iva = 0;
577 unsigned long flag;
578
579 /*
580 * In the non-present entry flush case, if hardware doesn't cache
581 * non-present entry we do nothing and if hardware cache non-present
582 * entry, we flush entries of domain 0 (the domain id is used to cache
583 * any non-present entries)
584 */
585 if (non_present_entry_flush) {
586 if (!cap_caching_mode(iommu->cap))
587 return 1;
588 else
589 did = 0;
590 }
591
592 switch (type) {
593 case DMA_TLB_GLOBAL_FLUSH:
594 /* global flush doesn't need set IVA_REG */
595 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
596 break;
597 case DMA_TLB_DSI_FLUSH:
598 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
599 break;
600 case DMA_TLB_PSI_FLUSH:
601 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
602 /* Note: always flush non-leaf currently */
603 val_iva = size_order | addr;
604 break;
605 default:
606 BUG();
607 }
608 /* Note: set drain read/write */
609#if 0
610 /*
611 * This is probably to be super secure.. Looks like we can
612 * ignore it without any impact.
613 */
614 if (cap_read_drain(iommu->cap))
615 val |= DMA_TLB_READ_DRAIN;
616#endif
617 if (cap_write_drain(iommu->cap))
618 val |= DMA_TLB_WRITE_DRAIN;
619
620 spin_lock_irqsave(&iommu->register_lock, flag);
621 /* Note: Only uses first TLB reg currently */
622 if (val_iva)
623 dmar_writeq(iommu->reg + tlb_offset, val_iva);
624 dmar_writeq(iommu->reg + tlb_offset + 8, val);
625
626 /* Make sure hardware complete it */
627 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
628 dmar_readq, (!(val & DMA_TLB_IVT)), val);
629
630 spin_unlock_irqrestore(&iommu->register_lock, flag);
631
632 /* check IOTLB invalidation granularity */
633 if (DMA_TLB_IAIG(val) == 0)
634 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
635 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
636 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
637 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
638 /* flush context entry will implictly flush write buffer */
639 return 0;
640}
641
ba395927
KA
642static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
643 u64 addr, unsigned int pages, int non_present_entry_flush)
644{
f76aec76 645 unsigned int mask;
ba395927
KA
646
647 BUG_ON(addr & (~PAGE_MASK_4K));
648 BUG_ON(pages == 0);
649
650 /* Fallback to domain selective flush if no PSI support */
651 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
652 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
653 DMA_TLB_DSI_FLUSH,
654 non_present_entry_flush);
ba395927
KA
655
656 /*
657 * PSI requires page size to be 2 ^ x, and the base address is naturally
658 * aligned to the size
659 */
f76aec76 660 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 661 /* Fallback to domain selective flush if size is too big */
f76aec76 662 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
663 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
664 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 665
a77b67d4
YS
666 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
667 DMA_TLB_PSI_FLUSH,
668 non_present_entry_flush);
ba395927
KA
669}
670
f8bab735 671static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
672{
673 u32 pmen;
674 unsigned long flags;
675
676 spin_lock_irqsave(&iommu->register_lock, flags);
677 pmen = readl(iommu->reg + DMAR_PMEN_REG);
678 pmen &= ~DMA_PMEN_EPM;
679 writel(pmen, iommu->reg + DMAR_PMEN_REG);
680
681 /* wait for the protected region status bit to clear */
682 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
683 readl, !(pmen & DMA_PMEN_PRS), pmen);
684
685 spin_unlock_irqrestore(&iommu->register_lock, flags);
686}
687
ba395927
KA
688static int iommu_enable_translation(struct intel_iommu *iommu)
689{
690 u32 sts;
691 unsigned long flags;
692
693 spin_lock_irqsave(&iommu->register_lock, flags);
694 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
695
696 /* Make sure hardware complete it */
697 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
698 readl, (sts & DMA_GSTS_TES), sts);
699
700 iommu->gcmd |= DMA_GCMD_TE;
701 spin_unlock_irqrestore(&iommu->register_lock, flags);
702 return 0;
703}
704
705static int iommu_disable_translation(struct intel_iommu *iommu)
706{
707 u32 sts;
708 unsigned long flag;
709
710 spin_lock_irqsave(&iommu->register_lock, flag);
711 iommu->gcmd &= ~DMA_GCMD_TE;
712 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
713
714 /* Make sure hardware complete it */
715 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
716 readl, (!(sts & DMA_GSTS_TES)), sts);
717
718 spin_unlock_irqrestore(&iommu->register_lock, flag);
719 return 0;
720}
721
3460a6d9
KA
722/* iommu interrupt handling. Most stuff are MSI-like. */
723
d94afc6c 724static const char *fault_reason_strings[] =
3460a6d9
KA
725{
726 "Software",
727 "Present bit in root entry is clear",
728 "Present bit in context entry is clear",
729 "Invalid context entry",
730 "Access beyond MGAW",
731 "PTE Write access is not set",
732 "PTE Read access is not set",
733 "Next page table ptr is invalid",
734 "Root table address invalid",
735 "Context table ptr is invalid",
736 "non-zero reserved fields in RTP",
737 "non-zero reserved fields in CTP",
738 "non-zero reserved fields in PTE",
3460a6d9 739};
f8bab735 740#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 741
d94afc6c 742const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 743{
d94afc6c 744 if (fault_reason > MAX_FAULT_REASON_IDX)
745 return "Unknown";
3460a6d9
KA
746 else
747 return fault_reason_strings[fault_reason];
748}
749
750void dmar_msi_unmask(unsigned int irq)
751{
752 struct intel_iommu *iommu = get_irq_data(irq);
753 unsigned long flag;
754
755 /* unmask it */
756 spin_lock_irqsave(&iommu->register_lock, flag);
757 writel(0, iommu->reg + DMAR_FECTL_REG);
758 /* Read a reg to force flush the post write */
759 readl(iommu->reg + DMAR_FECTL_REG);
760 spin_unlock_irqrestore(&iommu->register_lock, flag);
761}
762
763void dmar_msi_mask(unsigned int irq)
764{
765 unsigned long flag;
766 struct intel_iommu *iommu = get_irq_data(irq);
767
768 /* mask it */
769 spin_lock_irqsave(&iommu->register_lock, flag);
770 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
771 /* Read a reg to force flush the post write */
772 readl(iommu->reg + DMAR_FECTL_REG);
773 spin_unlock_irqrestore(&iommu->register_lock, flag);
774}
775
776void dmar_msi_write(int irq, struct msi_msg *msg)
777{
778 struct intel_iommu *iommu = get_irq_data(irq);
779 unsigned long flag;
780
781 spin_lock_irqsave(&iommu->register_lock, flag);
782 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
783 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
784 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
785 spin_unlock_irqrestore(&iommu->register_lock, flag);
786}
787
788void dmar_msi_read(int irq, struct msi_msg *msg)
789{
790 struct intel_iommu *iommu = get_irq_data(irq);
791 unsigned long flag;
792
793 spin_lock_irqsave(&iommu->register_lock, flag);
794 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
795 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
796 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
797 spin_unlock_irqrestore(&iommu->register_lock, flag);
798}
799
800static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
801 u8 fault_reason, u16 source_id, u64 addr)
802{
d94afc6c 803 const char *reason;
3460a6d9
KA
804
805 reason = dmar_get_fault_reason(fault_reason);
806
807 printk(KERN_ERR
808 "DMAR:[%s] Request device [%02x:%02x.%d] "
809 "fault addr %llx \n"
810 "DMAR:[fault reason %02d] %s\n",
811 (type ? "DMA Read" : "DMA Write"),
812 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
813 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
814 return 0;
815}
816
817#define PRIMARY_FAULT_REG_LEN (16)
818static irqreturn_t iommu_page_fault(int irq, void *dev_id)
819{
820 struct intel_iommu *iommu = dev_id;
821 int reg, fault_index;
822 u32 fault_status;
823 unsigned long flag;
824
825 spin_lock_irqsave(&iommu->register_lock, flag);
826 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
827
828 /* TBD: ignore advanced fault log currently */
829 if (!(fault_status & DMA_FSTS_PPF))
830 goto clear_overflow;
831
832 fault_index = dma_fsts_fault_record_index(fault_status);
833 reg = cap_fault_reg_offset(iommu->cap);
834 while (1) {
835 u8 fault_reason;
836 u16 source_id;
837 u64 guest_addr;
838 int type;
839 u32 data;
840
841 /* highest 32 bits */
842 data = readl(iommu->reg + reg +
843 fault_index * PRIMARY_FAULT_REG_LEN + 12);
844 if (!(data & DMA_FRCD_F))
845 break;
846
847 fault_reason = dma_frcd_fault_reason(data);
848 type = dma_frcd_type(data);
849
850 data = readl(iommu->reg + reg +
851 fault_index * PRIMARY_FAULT_REG_LEN + 8);
852 source_id = dma_frcd_source_id(data);
853
854 guest_addr = dmar_readq(iommu->reg + reg +
855 fault_index * PRIMARY_FAULT_REG_LEN);
856 guest_addr = dma_frcd_page_addr(guest_addr);
857 /* clear the fault */
858 writel(DMA_FRCD_F, iommu->reg + reg +
859 fault_index * PRIMARY_FAULT_REG_LEN + 12);
860
861 spin_unlock_irqrestore(&iommu->register_lock, flag);
862
863 iommu_page_fault_do_one(iommu, type, fault_reason,
864 source_id, guest_addr);
865
866 fault_index++;
867 if (fault_index > cap_num_fault_regs(iommu->cap))
868 fault_index = 0;
869 spin_lock_irqsave(&iommu->register_lock, flag);
870 }
871clear_overflow:
872 /* clear primary fault overflow */
873 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
874 if (fault_status & DMA_FSTS_PFO)
875 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
876
877 spin_unlock_irqrestore(&iommu->register_lock, flag);
878 return IRQ_HANDLED;
879}
880
881int dmar_set_interrupt(struct intel_iommu *iommu)
882{
883 int irq, ret;
884
885 irq = create_irq();
886 if (!irq) {
887 printk(KERN_ERR "IOMMU: no free vectors\n");
888 return -EINVAL;
889 }
890
891 set_irq_data(irq, iommu);
892 iommu->irq = irq;
893
894 ret = arch_setup_dmar_msi(irq);
895 if (ret) {
896 set_irq_data(irq, NULL);
897 iommu->irq = 0;
898 destroy_irq(irq);
899 return 0;
900 }
901
902 /* Force fault register is cleared */
903 iommu_page_fault(irq, iommu);
904
905 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
906 if (ret)
907 printk(KERN_ERR "IOMMU: can't request irq\n");
908 return ret;
909}
910
ba395927
KA
911static int iommu_init_domains(struct intel_iommu *iommu)
912{
913 unsigned long ndomains;
914 unsigned long nlongs;
915
916 ndomains = cap_ndoms(iommu->cap);
917 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
918 nlongs = BITS_TO_LONGS(ndomains);
919
920 /* TBD: there might be 64K domains,
921 * consider other allocation for future chip
922 */
923 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
924 if (!iommu->domain_ids) {
925 printk(KERN_ERR "Allocating domain id array failed\n");
926 return -ENOMEM;
927 }
928 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
929 GFP_KERNEL);
930 if (!iommu->domains) {
931 printk(KERN_ERR "Allocating domain array failed\n");
932 kfree(iommu->domain_ids);
933 return -ENOMEM;
934 }
935
e61d98d8
SS
936 spin_lock_init(&iommu->lock);
937
ba395927
KA
938 /*
939 * if Caching mode is set, then invalid translations are tagged
940 * with domainid 0. Hence we need to pre-allocate it.
941 */
942 if (cap_caching_mode(iommu->cap))
943 set_bit(0, iommu->domain_ids);
944 return 0;
945}
ba395927 946
ba395927
KA
947
948static void domain_exit(struct dmar_domain *domain);
e61d98d8
SS
949
950void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
951{
952 struct dmar_domain *domain;
953 int i;
954
ba395927
KA
955 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
956 for (; i < cap_ndoms(iommu->cap); ) {
957 domain = iommu->domains[i];
958 clear_bit(i, iommu->domain_ids);
959 domain_exit(domain);
960 i = find_next_bit(iommu->domain_ids,
961 cap_ndoms(iommu->cap), i+1);
962 }
963
964 if (iommu->gcmd & DMA_GCMD_TE)
965 iommu_disable_translation(iommu);
966
967 if (iommu->irq) {
968 set_irq_data(iommu->irq, NULL);
969 /* This will mask the irq */
970 free_irq(iommu->irq, iommu);
971 destroy_irq(iommu->irq);
972 }
973
974 kfree(iommu->domains);
975 kfree(iommu->domain_ids);
976
977 /* free context mapping */
978 free_context_table(iommu);
ba395927
KA
979}
980
981static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
982{
983 unsigned long num;
984 unsigned long ndomains;
985 struct dmar_domain *domain;
986 unsigned long flags;
987
988 domain = alloc_domain_mem();
989 if (!domain)
990 return NULL;
991
992 ndomains = cap_ndoms(iommu->cap);
993
994 spin_lock_irqsave(&iommu->lock, flags);
995 num = find_first_zero_bit(iommu->domain_ids, ndomains);
996 if (num >= ndomains) {
997 spin_unlock_irqrestore(&iommu->lock, flags);
998 free_domain_mem(domain);
999 printk(KERN_ERR "IOMMU: no free domain ids\n");
1000 return NULL;
1001 }
1002
1003 set_bit(num, iommu->domain_ids);
1004 domain->id = num;
1005 domain->iommu = iommu;
1006 iommu->domains[num] = domain;
1007 spin_unlock_irqrestore(&iommu->lock, flags);
1008
1009 return domain;
1010}
1011
1012static void iommu_free_domain(struct dmar_domain *domain)
1013{
1014 unsigned long flags;
1015
1016 spin_lock_irqsave(&domain->iommu->lock, flags);
1017 clear_bit(domain->id, domain->iommu->domain_ids);
1018 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1019}
1020
1021static struct iova_domain reserved_iova_list;
8a443df4
MG
1022static struct lock_class_key reserved_alloc_key;
1023static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1024
1025static void dmar_init_reserved_ranges(void)
1026{
1027 struct pci_dev *pdev = NULL;
1028 struct iova *iova;
1029 int i;
1030 u64 addr, size;
1031
f661197e 1032 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1033
8a443df4
MG
1034 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1035 &reserved_alloc_key);
1036 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1037 &reserved_rbtree_key);
1038
ba395927
KA
1039 /* IOAPIC ranges shouldn't be accessed by DMA */
1040 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1041 IOVA_PFN(IOAPIC_RANGE_END));
1042 if (!iova)
1043 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1044
1045 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1046 for_each_pci_dev(pdev) {
1047 struct resource *r;
1048
1049 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1050 r = &pdev->resource[i];
1051 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1052 continue;
1053 addr = r->start;
1054 addr &= PAGE_MASK_4K;
1055 size = r->end - addr;
1056 size = PAGE_ALIGN_4K(size);
1057 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1058 IOVA_PFN(size + addr) - 1);
1059 if (!iova)
1060 printk(KERN_ERR "Reserve iova failed\n");
1061 }
1062 }
1063
1064}
1065
1066static void domain_reserve_special_ranges(struct dmar_domain *domain)
1067{
1068 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1069}
1070
1071static inline int guestwidth_to_adjustwidth(int gaw)
1072{
1073 int agaw;
1074 int r = (gaw - 12) % 9;
1075
1076 if (r == 0)
1077 agaw = gaw;
1078 else
1079 agaw = gaw + 9 - r;
1080 if (agaw > 64)
1081 agaw = 64;
1082 return agaw;
1083}
1084
1085static int domain_init(struct dmar_domain *domain, int guest_width)
1086{
1087 struct intel_iommu *iommu;
1088 int adjust_width, agaw;
1089 unsigned long sagaw;
1090
f661197e 1091 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1092 spin_lock_init(&domain->mapping_lock);
1093
1094 domain_reserve_special_ranges(domain);
1095
1096 /* calculate AGAW */
1097 iommu = domain->iommu;
1098 if (guest_width > cap_mgaw(iommu->cap))
1099 guest_width = cap_mgaw(iommu->cap);
1100 domain->gaw = guest_width;
1101 adjust_width = guestwidth_to_adjustwidth(guest_width);
1102 agaw = width_to_agaw(adjust_width);
1103 sagaw = cap_sagaw(iommu->cap);
1104 if (!test_bit(agaw, &sagaw)) {
1105 /* hardware doesn't support it, choose a bigger one */
1106 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1107 agaw = find_next_bit(&sagaw, 5, agaw);
1108 if (agaw >= 5)
1109 return -ENODEV;
1110 }
1111 domain->agaw = agaw;
1112 INIT_LIST_HEAD(&domain->devices);
1113
1114 /* always allocate the top pgd */
1115 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1116 if (!domain->pgd)
1117 return -ENOMEM;
1118 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1119 return 0;
1120}
1121
1122static void domain_exit(struct dmar_domain *domain)
1123{
1124 u64 end;
1125
1126 /* Domain 0 is reserved, so dont process it */
1127 if (!domain)
1128 return;
1129
1130 domain_remove_dev_info(domain);
1131 /* destroy iovas */
1132 put_iova_domain(&domain->iovad);
1133 end = DOMAIN_MAX_ADDR(domain->gaw);
1134 end = end & (~PAGE_MASK_4K);
1135
1136 /* clear ptes */
1137 dma_pte_clear_range(domain, 0, end);
1138
1139 /* free page tables */
1140 dma_pte_free_pagetable(domain, 0, end);
1141
1142 iommu_free_domain(domain);
1143 free_domain_mem(domain);
1144}
1145
1146static int domain_context_mapping_one(struct dmar_domain *domain,
1147 u8 bus, u8 devfn)
1148{
1149 struct context_entry *context;
1150 struct intel_iommu *iommu = domain->iommu;
1151 unsigned long flags;
1152
1153 pr_debug("Set context mapping for %02x:%02x.%d\n",
1154 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1155 BUG_ON(!domain->pgd);
1156 context = device_to_context_entry(iommu, bus, devfn);
1157 if (!context)
1158 return -ENOMEM;
1159 spin_lock_irqsave(&iommu->lock, flags);
1160 if (context_present(*context)) {
1161 spin_unlock_irqrestore(&iommu->lock, flags);
1162 return 0;
1163 }
1164
1165 context_set_domain_id(*context, domain->id);
1166 context_set_address_width(*context, domain->agaw);
1167 context_set_address_root(*context, virt_to_phys(domain->pgd));
1168 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1169 context_set_fault_enable(*context);
1170 context_set_present(*context);
1171 __iommu_flush_cache(iommu, context, sizeof(*context));
1172
1173 /* it's a non-present to present mapping */
a77b67d4
YS
1174 if (iommu->flush.flush_context(iommu, domain->id,
1175 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1176 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1177 iommu_flush_write_buffer(iommu);
1178 else
a77b67d4
YS
1179 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1180
ba395927
KA
1181 spin_unlock_irqrestore(&iommu->lock, flags);
1182 return 0;
1183}
1184
1185static int
1186domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1187{
1188 int ret;
1189 struct pci_dev *tmp, *parent;
1190
1191 ret = domain_context_mapping_one(domain, pdev->bus->number,
1192 pdev->devfn);
1193 if (ret)
1194 return ret;
1195
1196 /* dependent device mapping */
1197 tmp = pci_find_upstream_pcie_bridge(pdev);
1198 if (!tmp)
1199 return 0;
1200 /* Secondary interface's bus number and devfn 0 */
1201 parent = pdev->bus->self;
1202 while (parent != tmp) {
1203 ret = domain_context_mapping_one(domain, parent->bus->number,
1204 parent->devfn);
1205 if (ret)
1206 return ret;
1207 parent = parent->bus->self;
1208 }
1209 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1210 return domain_context_mapping_one(domain,
1211 tmp->subordinate->number, 0);
1212 else /* this is a legacy PCI bridge */
1213 return domain_context_mapping_one(domain,
1214 tmp->bus->number, tmp->devfn);
1215}
1216
1217static int domain_context_mapped(struct dmar_domain *domain,
1218 struct pci_dev *pdev)
1219{
1220 int ret;
1221 struct pci_dev *tmp, *parent;
1222
1223 ret = device_context_mapped(domain->iommu,
1224 pdev->bus->number, pdev->devfn);
1225 if (!ret)
1226 return ret;
1227 /* dependent device mapping */
1228 tmp = pci_find_upstream_pcie_bridge(pdev);
1229 if (!tmp)
1230 return ret;
1231 /* Secondary interface's bus number and devfn 0 */
1232 parent = pdev->bus->self;
1233 while (parent != tmp) {
1234 ret = device_context_mapped(domain->iommu, parent->bus->number,
1235 parent->devfn);
1236 if (!ret)
1237 return ret;
1238 parent = parent->bus->self;
1239 }
1240 if (tmp->is_pcie)
1241 return device_context_mapped(domain->iommu,
1242 tmp->subordinate->number, 0);
1243 else
1244 return device_context_mapped(domain->iommu,
1245 tmp->bus->number, tmp->devfn);
1246}
1247
1248static int
1249domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1250 u64 hpa, size_t size, int prot)
1251{
1252 u64 start_pfn, end_pfn;
1253 struct dma_pte *pte;
1254 int index;
1255
1256 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1257 return -EINVAL;
1258 iova &= PAGE_MASK_4K;
1259 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1260 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1261 index = 0;
1262 while (start_pfn < end_pfn) {
1263 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1264 if (!pte)
1265 return -ENOMEM;
1266 /* We don't need lock here, nobody else
1267 * touches the iova range
1268 */
1269 BUG_ON(dma_pte_addr(*pte));
1270 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1271 dma_set_pte_prot(*pte, prot);
1272 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1273 start_pfn++;
1274 index++;
1275 }
1276 return 0;
1277}
1278
1279static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1280{
1281 clear_context_table(domain->iommu, bus, devfn);
a77b67d4
YS
1282 domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1283 DMA_CCMD_GLOBAL_INVL, 0);
1284 domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1285 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1286}
1287
1288static void domain_remove_dev_info(struct dmar_domain *domain)
1289{
1290 struct device_domain_info *info;
1291 unsigned long flags;
1292
1293 spin_lock_irqsave(&device_domain_lock, flags);
1294 while (!list_empty(&domain->devices)) {
1295 info = list_entry(domain->devices.next,
1296 struct device_domain_info, link);
1297 list_del(&info->link);
1298 list_del(&info->global);
1299 if (info->dev)
358dd8ac 1300 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1301 spin_unlock_irqrestore(&device_domain_lock, flags);
1302
1303 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1304 free_devinfo_mem(info);
1305
1306 spin_lock_irqsave(&device_domain_lock, flags);
1307 }
1308 spin_unlock_irqrestore(&device_domain_lock, flags);
1309}
1310
1311/*
1312 * find_domain
358dd8ac 1313 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1314 */
38717946 1315static struct dmar_domain *
ba395927
KA
1316find_domain(struct pci_dev *pdev)
1317{
1318 struct device_domain_info *info;
1319
1320 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1321 info = pdev->dev.archdata.iommu;
ba395927
KA
1322 if (info)
1323 return info->domain;
1324 return NULL;
1325}
1326
ba395927
KA
1327/* domain is initialized */
1328static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1329{
1330 struct dmar_domain *domain, *found = NULL;
1331 struct intel_iommu *iommu;
1332 struct dmar_drhd_unit *drhd;
1333 struct device_domain_info *info, *tmp;
1334 struct pci_dev *dev_tmp;
1335 unsigned long flags;
1336 int bus = 0, devfn = 0;
1337
1338 domain = find_domain(pdev);
1339 if (domain)
1340 return domain;
1341
1342 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1343 if (dev_tmp) {
1344 if (dev_tmp->is_pcie) {
1345 bus = dev_tmp->subordinate->number;
1346 devfn = 0;
1347 } else {
1348 bus = dev_tmp->bus->number;
1349 devfn = dev_tmp->devfn;
1350 }
1351 spin_lock_irqsave(&device_domain_lock, flags);
1352 list_for_each_entry(info, &device_domain_list, global) {
1353 if (info->bus == bus && info->devfn == devfn) {
1354 found = info->domain;
1355 break;
1356 }
1357 }
1358 spin_unlock_irqrestore(&device_domain_lock, flags);
1359 /* pcie-pci bridge already has a domain, uses it */
1360 if (found) {
1361 domain = found;
1362 goto found_domain;
1363 }
1364 }
1365
1366 /* Allocate new domain for the device */
1367 drhd = dmar_find_matched_drhd_unit(pdev);
1368 if (!drhd) {
1369 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1370 pci_name(pdev));
1371 return NULL;
1372 }
1373 iommu = drhd->iommu;
1374
1375 domain = iommu_alloc_domain(iommu);
1376 if (!domain)
1377 goto error;
1378
1379 if (domain_init(domain, gaw)) {
1380 domain_exit(domain);
1381 goto error;
1382 }
1383
1384 /* register pcie-to-pci device */
1385 if (dev_tmp) {
1386 info = alloc_devinfo_mem();
1387 if (!info) {
1388 domain_exit(domain);
1389 goto error;
1390 }
1391 info->bus = bus;
1392 info->devfn = devfn;
1393 info->dev = NULL;
1394 info->domain = domain;
1395 /* This domain is shared by devices under p2p bridge */
1396 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1397
1398 /* pcie-to-pci bridge already has a domain, uses it */
1399 found = NULL;
1400 spin_lock_irqsave(&device_domain_lock, flags);
1401 list_for_each_entry(tmp, &device_domain_list, global) {
1402 if (tmp->bus == bus && tmp->devfn == devfn) {
1403 found = tmp->domain;
1404 break;
1405 }
1406 }
1407 if (found) {
1408 free_devinfo_mem(info);
1409 domain_exit(domain);
1410 domain = found;
1411 } else {
1412 list_add(&info->link, &domain->devices);
1413 list_add(&info->global, &device_domain_list);
1414 }
1415 spin_unlock_irqrestore(&device_domain_lock, flags);
1416 }
1417
1418found_domain:
1419 info = alloc_devinfo_mem();
1420 if (!info)
1421 goto error;
1422 info->bus = pdev->bus->number;
1423 info->devfn = pdev->devfn;
1424 info->dev = pdev;
1425 info->domain = domain;
1426 spin_lock_irqsave(&device_domain_lock, flags);
1427 /* somebody is fast */
1428 found = find_domain(pdev);
1429 if (found != NULL) {
1430 spin_unlock_irqrestore(&device_domain_lock, flags);
1431 if (found != domain) {
1432 domain_exit(domain);
1433 domain = found;
1434 }
1435 free_devinfo_mem(info);
1436 return domain;
1437 }
1438 list_add(&info->link, &domain->devices);
1439 list_add(&info->global, &device_domain_list);
358dd8ac 1440 pdev->dev.archdata.iommu = info;
ba395927
KA
1441 spin_unlock_irqrestore(&device_domain_lock, flags);
1442 return domain;
1443error:
1444 /* recheck it here, maybe others set it */
1445 return find_domain(pdev);
1446}
1447
1448static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1449{
1450 struct dmar_domain *domain;
1451 unsigned long size;
1452 u64 base;
1453 int ret;
1454
1455 printk(KERN_INFO
1456 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1457 pci_name(pdev), start, end);
1458 /* page table init */
1459 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1460 if (!domain)
1461 return -ENOMEM;
1462
1463 /* The address might not be aligned */
1464 base = start & PAGE_MASK_4K;
1465 size = end - base;
1466 size = PAGE_ALIGN_4K(size);
1467 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1468 IOVA_PFN(base + size) - 1)) {
1469 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1470 ret = -ENOMEM;
1471 goto error;
1472 }
1473
1474 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1475 size, base, pci_name(pdev));
1476 /*
1477 * RMRR range might have overlap with physical memory range,
1478 * clear it first
1479 */
1480 dma_pte_clear_range(domain, base, base + size);
1481
1482 ret = domain_page_mapping(domain, base, base, size,
1483 DMA_PTE_READ|DMA_PTE_WRITE);
1484 if (ret)
1485 goto error;
1486
1487 /* context entry init */
1488 ret = domain_context_mapping(domain, pdev);
1489 if (!ret)
1490 return 0;
1491error:
1492 domain_exit(domain);
1493 return ret;
1494
1495}
1496
1497static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1498 struct pci_dev *pdev)
1499{
358dd8ac 1500 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1501 return 0;
1502 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1503 rmrr->end_address + 1);
1504}
1505
e820482c 1506#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1507struct iommu_prepare_data {
1508 struct pci_dev *pdev;
1509 int ret;
1510};
1511
1512static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1513 unsigned long end_pfn, void *datax)
1514{
1515 struct iommu_prepare_data *data;
1516
1517 data = (struct iommu_prepare_data *)datax;
1518
1519 data->ret = iommu_prepare_identity_map(data->pdev,
1520 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1521 return data->ret;
1522
1523}
1524
1525static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1526{
1527 int nid;
1528 struct iommu_prepare_data data;
1529
1530 data.pdev = pdev;
1531 data.ret = 0;
1532
1533 for_each_online_node(nid) {
1534 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1535 if (data.ret)
1536 return data.ret;
1537 }
1538 return data.ret;
1539}
1540
e820482c
KA
1541static void __init iommu_prepare_gfx_mapping(void)
1542{
1543 struct pci_dev *pdev = NULL;
e820482c
KA
1544 int ret;
1545
1546 for_each_pci_dev(pdev) {
358dd8ac 1547 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1548 !IS_GFX_DEVICE(pdev))
1549 continue;
1550 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1551 pci_name(pdev));
d52d53b8
YL
1552 ret = iommu_prepare_with_active_regions(pdev);
1553 if (ret)
1554 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1555 }
1556}
1557#endif
1558
49a0429e
KA
1559#ifdef CONFIG_DMAR_FLOPPY_WA
1560static inline void iommu_prepare_isa(void)
1561{
1562 struct pci_dev *pdev;
1563 int ret;
1564
1565 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1566 if (!pdev)
1567 return;
1568
1569 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1570 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1571
1572 if (ret)
1573 printk("IOMMU: Failed to create 0-64M identity map, "
1574 "floppy might not work\n");
1575
1576}
1577#else
1578static inline void iommu_prepare_isa(void)
1579{
1580 return;
1581}
1582#endif /* !CONFIG_DMAR_FLPY_WA */
1583
ba395927
KA
1584int __init init_dmars(void)
1585{
1586 struct dmar_drhd_unit *drhd;
1587 struct dmar_rmrr_unit *rmrr;
1588 struct pci_dev *pdev;
1589 struct intel_iommu *iommu;
80b20dd8 1590 int i, ret, unit = 0;
ba395927
KA
1591
1592 /*
1593 * for each drhd
1594 * allocate root
1595 * initialize and program root entry to not present
1596 * endfor
1597 */
1598 for_each_drhd_unit(drhd) {
5e0d2a6f 1599 g_num_of_iommus++;
1600 /*
1601 * lock not needed as this is only incremented in the single
1602 * threaded kernel __init code path all other access are read
1603 * only
1604 */
1605 }
1606
80b20dd8 1607 deferred_flush = kzalloc(g_num_of_iommus *
1608 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1609 if (!deferred_flush) {
5e0d2a6f 1610 ret = -ENOMEM;
1611 goto error;
1612 }
1613
5e0d2a6f 1614 for_each_drhd_unit(drhd) {
1615 if (drhd->ignored)
1616 continue;
1886e8a9
SS
1617
1618 iommu = drhd->iommu;
ba395927 1619
e61d98d8
SS
1620 ret = iommu_init_domains(iommu);
1621 if (ret)
1622 goto error;
1623
ba395927
KA
1624 /*
1625 * TBD:
1626 * we could share the same root & context tables
1627 * amoung all IOMMU's. Need to Split it later.
1628 */
1629 ret = iommu_alloc_root_entry(iommu);
1630 if (ret) {
1631 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1632 goto error;
1633 }
1634 }
1635
a77b67d4
YS
1636 for_each_drhd_unit(drhd) {
1637 if (drhd->ignored)
1638 continue;
1639
1640 iommu = drhd->iommu;
1641 if (dmar_enable_qi(iommu)) {
1642 /*
1643 * Queued Invalidate not enabled, use Register Based
1644 * Invalidate
1645 */
1646 iommu->flush.flush_context = __iommu_flush_context;
1647 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1648 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1649 "invalidation\n", drhd->reg_base_addr);
1650 } else {
1651 iommu->flush.flush_context = qi_flush_context;
1652 iommu->flush.flush_iotlb = qi_flush_iotlb;
1653 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1654 "invalidation\n", drhd->reg_base_addr);
1655 }
1656 }
1657
ba395927
KA
1658 /*
1659 * For each rmrr
1660 * for each dev attached to rmrr
1661 * do
1662 * locate drhd for dev, alloc domain for dev
1663 * allocate free domain
1664 * allocate page table entries for rmrr
1665 * if context not allocated for bus
1666 * allocate and init context
1667 * set present in root table for this bus
1668 * init context with domain, translation etc
1669 * endfor
1670 * endfor
1671 */
1672 for_each_rmrr_units(rmrr) {
ba395927
KA
1673 for (i = 0; i < rmrr->devices_cnt; i++) {
1674 pdev = rmrr->devices[i];
1675 /* some BIOS lists non-exist devices in DMAR table */
1676 if (!pdev)
1677 continue;
1678 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1679 if (ret)
1680 printk(KERN_ERR
1681 "IOMMU: mapping reserved region failed\n");
1682 }
1683 }
1684
e820482c
KA
1685 iommu_prepare_gfx_mapping();
1686
49a0429e
KA
1687 iommu_prepare_isa();
1688
ba395927
KA
1689 /*
1690 * for each drhd
1691 * enable fault log
1692 * global invalidate context cache
1693 * global invalidate iotlb
1694 * enable translation
1695 */
1696 for_each_drhd_unit(drhd) {
1697 if (drhd->ignored)
1698 continue;
1699 iommu = drhd->iommu;
1700 sprintf (iommu->name, "dmar%d", unit++);
1701
1702 iommu_flush_write_buffer(iommu);
1703
3460a6d9
KA
1704 ret = dmar_set_interrupt(iommu);
1705 if (ret)
1706 goto error;
1707
ba395927
KA
1708 iommu_set_root_entry(iommu);
1709
a77b67d4
YS
1710 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1711 0);
1712 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1713 0);
f8bab735 1714 iommu_disable_protect_mem_regions(iommu);
1715
ba395927
KA
1716 ret = iommu_enable_translation(iommu);
1717 if (ret)
1718 goto error;
1719 }
1720
1721 return 0;
1722error:
1723 for_each_drhd_unit(drhd) {
1724 if (drhd->ignored)
1725 continue;
1726 iommu = drhd->iommu;
1727 free_iommu(iommu);
1728 }
1729 return ret;
1730}
1731
1732static inline u64 aligned_size(u64 host_addr, size_t size)
1733{
1734 u64 addr;
1735 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1736 return PAGE_ALIGN_4K(addr);
1737}
1738
1739struct iova *
f76aec76 1740iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 1741{
ba395927
KA
1742 struct iova *piova;
1743
1744 /* Make sure it's in range */
ba395927 1745 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 1746 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
1747 return NULL;
1748
1749 piova = alloc_iova(&domain->iovad,
f76aec76 1750 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
ba395927
KA
1751 return piova;
1752}
1753
f76aec76
KA
1754static struct iova *
1755__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1756 size_t size)
ba395927 1757{
ba395927 1758 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 1759 struct iova *iova = NULL;
ba395927 1760
7d3b03ce 1761 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
f76aec76 1762 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
ba395927
KA
1763 } else {
1764 /*
1765 * First try to allocate an io virtual address in
1766 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 1767 * from higher range
ba395927 1768 */
f76aec76 1769 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 1770 if (!iova)
f76aec76 1771 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
ba395927
KA
1772 }
1773
1774 if (!iova) {
1775 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
1776 return NULL;
1777 }
1778
1779 return iova;
1780}
1781
1782static struct dmar_domain *
1783get_valid_domain_for_dev(struct pci_dev *pdev)
1784{
1785 struct dmar_domain *domain;
1786 int ret;
1787
1788 domain = get_domain_for_dev(pdev,
1789 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1790 if (!domain) {
1791 printk(KERN_ERR
1792 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 1793 return NULL;
ba395927
KA
1794 }
1795
1796 /* make sure context mapping is ok */
1797 if (unlikely(!domain_context_mapped(domain, pdev))) {
1798 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
1799 if (ret) {
1800 printk(KERN_ERR
1801 "Domain context map for %s failed",
1802 pci_name(pdev));
4fe05bbc 1803 return NULL;
f76aec76 1804 }
ba395927
KA
1805 }
1806
f76aec76
KA
1807 return domain;
1808}
1809
6865f0d1
IM
1810static dma_addr_t
1811intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
f76aec76
KA
1812{
1813 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 1814 struct dmar_domain *domain;
6865f0d1 1815 unsigned long start_paddr;
f76aec76
KA
1816 struct iova *iova;
1817 int prot = 0;
6865f0d1 1818 int ret;
f76aec76
KA
1819
1820 BUG_ON(dir == DMA_NONE);
358dd8ac 1821 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 1822 return paddr;
f76aec76
KA
1823
1824 domain = get_valid_domain_for_dev(pdev);
1825 if (!domain)
1826 return 0;
1827
6865f0d1 1828 size = aligned_size((u64)paddr, size);
f76aec76
KA
1829
1830 iova = __intel_alloc_iova(hwdev, domain, size);
1831 if (!iova)
1832 goto error;
1833
6865f0d1 1834 start_paddr = iova->pfn_lo << PAGE_SHIFT_4K;
f76aec76 1835
ba395927
KA
1836 /*
1837 * Check if DMAR supports zero-length reads on write only
1838 * mappings..
1839 */
1840 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1841 !cap_zlr(domain->iommu->cap))
1842 prot |= DMA_PTE_READ;
1843 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1844 prot |= DMA_PTE_WRITE;
1845 /*
6865f0d1 1846 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 1847 * page. Note: if two part of one page are separately mapped, we
6865f0d1 1848 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
1849 * is not a big problem
1850 */
6865f0d1
IM
1851 ret = domain_page_mapping(domain, start_paddr,
1852 ((u64)paddr) & PAGE_MASK_4K, size, prot);
ba395927
KA
1853 if (ret)
1854 goto error;
1855
1856 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
6865f0d1
IM
1857 pci_name(pdev), size, (u64)paddr,
1858 size, (u64)start_paddr, dir);
f76aec76
KA
1859
1860 /* it's a non-present to present mapping */
1861 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
6865f0d1 1862 start_paddr, size >> PAGE_SHIFT_4K, 1);
f76aec76
KA
1863 if (ret)
1864 iommu_flush_write_buffer(domain->iommu);
1865
6865f0d1 1866 return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K)));
ba395927 1867
ba395927 1868error:
f76aec76
KA
1869 if (iova)
1870 __free_iova(&domain->iovad, iova);
ba395927 1871 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
6865f0d1 1872 pci_name(pdev), size, (u64)paddr, dir);
ba395927
KA
1873 return 0;
1874}
1875
5e0d2a6f 1876static void flush_unmaps(void)
1877{
80b20dd8 1878 int i, j;
5e0d2a6f 1879
5e0d2a6f 1880 timer_on = 0;
1881
1882 /* just flush them all */
1883 for (i = 0; i < g_num_of_iommus; i++) {
80b20dd8 1884 if (deferred_flush[i].next) {
c42d9f32
SS
1885 struct intel_iommu *iommu =
1886 deferred_flush[i].domain[0]->iommu;
1887
a77b67d4
YS
1888 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1889 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 1890 for (j = 0; j < deferred_flush[i].next; j++) {
1891 __free_iova(&deferred_flush[i].domain[j]->iovad,
1892 deferred_flush[i].iova[j]);
1893 }
1894 deferred_flush[i].next = 0;
1895 }
5e0d2a6f 1896 }
1897
5e0d2a6f 1898 list_size = 0;
5e0d2a6f 1899}
1900
1901static void flush_unmaps_timeout(unsigned long data)
1902{
80b20dd8 1903 unsigned long flags;
1904
1905 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 1906 flush_unmaps();
80b20dd8 1907 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 1908}
1909
1910static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1911{
1912 unsigned long flags;
80b20dd8 1913 int next, iommu_id;
5e0d2a6f 1914
1915 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 1916 if (list_size == HIGH_WATER_MARK)
1917 flush_unmaps();
1918
c42d9f32
SS
1919 iommu_id = dom->iommu->seq_id;
1920
80b20dd8 1921 next = deferred_flush[iommu_id].next;
1922 deferred_flush[iommu_id].domain[next] = dom;
1923 deferred_flush[iommu_id].iova[next] = iova;
1924 deferred_flush[iommu_id].next++;
5e0d2a6f 1925
1926 if (!timer_on) {
1927 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
1928 timer_on = 1;
1929 }
1930 list_size++;
1931 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1932}
1933
f76aec76 1934static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
ba395927
KA
1935 size_t size, int dir)
1936{
ba395927 1937 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
1938 struct dmar_domain *domain;
1939 unsigned long start_addr;
ba395927
KA
1940 struct iova *iova;
1941
358dd8ac 1942 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 1943 return;
ba395927
KA
1944 domain = find_domain(pdev);
1945 BUG_ON(!domain);
1946
1947 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 1948 if (!iova)
ba395927 1949 return;
ba395927 1950
f76aec76
KA
1951 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1952 size = aligned_size((u64)dev_addr, size);
ba395927 1953
f76aec76
KA
1954 pr_debug("Device %s unmapping: %lx@%llx\n",
1955 pci_name(pdev), size, (u64)start_addr);
ba395927 1956
f76aec76
KA
1957 /* clear the whole page */
1958 dma_pte_clear_range(domain, start_addr, start_addr + size);
1959 /* free page tables */
1960 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 1961 if (intel_iommu_strict) {
1962 if (iommu_flush_iotlb_psi(domain->iommu,
1963 domain->id, start_addr, size >> PAGE_SHIFT_4K, 0))
1964 iommu_flush_write_buffer(domain->iommu);
1965 /* free iova */
1966 __free_iova(&domain->iovad, iova);
1967 } else {
1968 add_unmap(domain, iova);
1969 /*
1970 * queue up the release of the unmap to save the 1/6th of the
1971 * cpu used up by the iotlb flush operation...
1972 */
5e0d2a6f 1973 }
ba395927
KA
1974}
1975
1976static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1977 dma_addr_t *dma_handle, gfp_t flags)
1978{
1979 void *vaddr;
1980 int order;
1981
1982 size = PAGE_ALIGN_4K(size);
1983 order = get_order(size);
1984 flags &= ~(GFP_DMA | GFP_DMA32);
1985
1986 vaddr = (void *)__get_free_pages(flags, order);
1987 if (!vaddr)
1988 return NULL;
1989 memset(vaddr, 0, size);
1990
6865f0d1 1991 *dma_handle = intel_map_single(hwdev, virt_to_bus(vaddr), size, DMA_BIDIRECTIONAL);
ba395927
KA
1992 if (*dma_handle)
1993 return vaddr;
1994 free_pages((unsigned long)vaddr, order);
1995 return NULL;
1996}
1997
1998static void intel_free_coherent(struct device *hwdev, size_t size,
1999 void *vaddr, dma_addr_t dma_handle)
2000{
2001 int order;
2002
2003 size = PAGE_ALIGN_4K(size);
2004 order = get_order(size);
2005
2006 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2007 free_pages((unsigned long)vaddr, order);
2008}
2009
12d4d40e 2010#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
c03ab37c 2011static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
ba395927
KA
2012 int nelems, int dir)
2013{
2014 int i;
2015 struct pci_dev *pdev = to_pci_dev(hwdev);
2016 struct dmar_domain *domain;
f76aec76
KA
2017 unsigned long start_addr;
2018 struct iova *iova;
2019 size_t size = 0;
2020 void *addr;
c03ab37c 2021 struct scatterlist *sg;
ba395927 2022
358dd8ac 2023 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2024 return;
2025
2026 domain = find_domain(pdev);
ba395927 2027
c03ab37c 2028 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2029 if (!iova)
2030 return;
c03ab37c 2031 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2032 addr = SG_ENT_VIRT_ADDRESS(sg);
2033 size += aligned_size((u64)addr, sg->length);
2034 }
2035
2036 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2037
2038 /* clear the whole page */
2039 dma_pte_clear_range(domain, start_addr, start_addr + size);
2040 /* free page tables */
2041 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2042
2043 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2044 size >> PAGE_SHIFT_4K, 0))
ba395927 2045 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
2046
2047 /* free iova */
2048 __free_iova(&domain->iovad, iova);
ba395927
KA
2049}
2050
ba395927 2051static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2052 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2053{
2054 int i;
c03ab37c 2055 struct scatterlist *sg;
ba395927 2056
c03ab37c 2057 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2058 BUG_ON(!sg_page(sg));
c03ab37c
FT
2059 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2060 sg->dma_length = sg->length;
ba395927
KA
2061 }
2062 return nelems;
2063}
2064
c03ab37c
FT
2065static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2066 int nelems, int dir)
ba395927
KA
2067{
2068 void *addr;
2069 int i;
ba395927
KA
2070 struct pci_dev *pdev = to_pci_dev(hwdev);
2071 struct dmar_domain *domain;
f76aec76
KA
2072 size_t size = 0;
2073 int prot = 0;
2074 size_t offset = 0;
2075 struct iova *iova = NULL;
2076 int ret;
c03ab37c 2077 struct scatterlist *sg;
f76aec76 2078 unsigned long start_addr;
ba395927
KA
2079
2080 BUG_ON(dir == DMA_NONE);
358dd8ac 2081 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2082 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2083
f76aec76
KA
2084 domain = get_valid_domain_for_dev(pdev);
2085 if (!domain)
2086 return 0;
2087
c03ab37c 2088 for_each_sg(sglist, sg, nelems, i) {
ba395927 2089 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2090 addr = (void *)virt_to_phys(addr);
2091 size += aligned_size((u64)addr, sg->length);
2092 }
2093
2094 iova = __intel_alloc_iova(hwdev, domain, size);
2095 if (!iova) {
c03ab37c 2096 sglist->dma_length = 0;
f76aec76
KA
2097 return 0;
2098 }
2099
2100 /*
2101 * Check if DMAR supports zero-length reads on write only
2102 * mappings..
2103 */
2104 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2105 !cap_zlr(domain->iommu->cap))
2106 prot |= DMA_PTE_READ;
2107 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2108 prot |= DMA_PTE_WRITE;
2109
2110 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2111 offset = 0;
c03ab37c 2112 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2113 addr = SG_ENT_VIRT_ADDRESS(sg);
2114 addr = (void *)virt_to_phys(addr);
2115 size = aligned_size((u64)addr, sg->length);
2116 ret = domain_page_mapping(domain, start_addr + offset,
2117 ((u64)addr) & PAGE_MASK_4K,
2118 size, prot);
2119 if (ret) {
2120 /* clear the page */
2121 dma_pte_clear_range(domain, start_addr,
2122 start_addr + offset);
2123 /* free page tables */
2124 dma_pte_free_pagetable(domain, start_addr,
2125 start_addr + offset);
2126 /* free iova */
2127 __free_iova(&domain->iovad, iova);
ba395927
KA
2128 return 0;
2129 }
f76aec76
KA
2130 sg->dma_address = start_addr + offset +
2131 ((u64)addr & (~PAGE_MASK_4K));
ba395927 2132 sg->dma_length = sg->length;
f76aec76 2133 offset += size;
ba395927
KA
2134 }
2135
ba395927 2136 /* it's a non-present to present mapping */
f76aec76
KA
2137 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2138 start_addr, offset >> PAGE_SHIFT_4K, 1))
ba395927
KA
2139 iommu_flush_write_buffer(domain->iommu);
2140 return nelems;
2141}
2142
2143static struct dma_mapping_ops intel_dma_ops = {
2144 .alloc_coherent = intel_alloc_coherent,
2145 .free_coherent = intel_free_coherent,
2146 .map_single = intel_map_single,
2147 .unmap_single = intel_unmap_single,
2148 .map_sg = intel_map_sg,
2149 .unmap_sg = intel_unmap_sg,
2150};
2151
2152static inline int iommu_domain_cache_init(void)
2153{
2154 int ret = 0;
2155
2156 iommu_domain_cache = kmem_cache_create("iommu_domain",
2157 sizeof(struct dmar_domain),
2158 0,
2159 SLAB_HWCACHE_ALIGN,
2160
2161 NULL);
2162 if (!iommu_domain_cache) {
2163 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2164 ret = -ENOMEM;
2165 }
2166
2167 return ret;
2168}
2169
2170static inline int iommu_devinfo_cache_init(void)
2171{
2172 int ret = 0;
2173
2174 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2175 sizeof(struct device_domain_info),
2176 0,
2177 SLAB_HWCACHE_ALIGN,
2178
2179 NULL);
2180 if (!iommu_devinfo_cache) {
2181 printk(KERN_ERR "Couldn't create devinfo cache\n");
2182 ret = -ENOMEM;
2183 }
2184
2185 return ret;
2186}
2187
2188static inline int iommu_iova_cache_init(void)
2189{
2190 int ret = 0;
2191
2192 iommu_iova_cache = kmem_cache_create("iommu_iova",
2193 sizeof(struct iova),
2194 0,
2195 SLAB_HWCACHE_ALIGN,
2196
2197 NULL);
2198 if (!iommu_iova_cache) {
2199 printk(KERN_ERR "Couldn't create iova cache\n");
2200 ret = -ENOMEM;
2201 }
2202
2203 return ret;
2204}
2205
2206static int __init iommu_init_mempool(void)
2207{
2208 int ret;
2209 ret = iommu_iova_cache_init();
2210 if (ret)
2211 return ret;
2212
2213 ret = iommu_domain_cache_init();
2214 if (ret)
2215 goto domain_error;
2216
2217 ret = iommu_devinfo_cache_init();
2218 if (!ret)
2219 return ret;
2220
2221 kmem_cache_destroy(iommu_domain_cache);
2222domain_error:
2223 kmem_cache_destroy(iommu_iova_cache);
2224
2225 return -ENOMEM;
2226}
2227
2228static void __init iommu_exit_mempool(void)
2229{
2230 kmem_cache_destroy(iommu_devinfo_cache);
2231 kmem_cache_destroy(iommu_domain_cache);
2232 kmem_cache_destroy(iommu_iova_cache);
2233
2234}
2235
ba395927
KA
2236static void __init init_no_remapping_devices(void)
2237{
2238 struct dmar_drhd_unit *drhd;
2239
2240 for_each_drhd_unit(drhd) {
2241 if (!drhd->include_all) {
2242 int i;
2243 for (i = 0; i < drhd->devices_cnt; i++)
2244 if (drhd->devices[i] != NULL)
2245 break;
2246 /* ignore DMAR unit if no pci devices exist */
2247 if (i == drhd->devices_cnt)
2248 drhd->ignored = 1;
2249 }
2250 }
2251
2252 if (dmar_map_gfx)
2253 return;
2254
2255 for_each_drhd_unit(drhd) {
2256 int i;
2257 if (drhd->ignored || drhd->include_all)
2258 continue;
2259
2260 for (i = 0; i < drhd->devices_cnt; i++)
2261 if (drhd->devices[i] &&
2262 !IS_GFX_DEVICE(drhd->devices[i]))
2263 break;
2264
2265 if (i < drhd->devices_cnt)
2266 continue;
2267
2268 /* bypass IOMMU if it is just for gfx devices */
2269 drhd->ignored = 1;
2270 for (i = 0; i < drhd->devices_cnt; i++) {
2271 if (!drhd->devices[i])
2272 continue;
358dd8ac 2273 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2274 }
2275 }
2276}
2277
2278int __init intel_iommu_init(void)
2279{
2280 int ret = 0;
2281
ba395927
KA
2282 if (dmar_table_init())
2283 return -ENODEV;
2284
1886e8a9
SS
2285 if (dmar_dev_scope_init())
2286 return -ENODEV;
2287
2ae21010
SS
2288 /*
2289 * Check the need for DMA-remapping initialization now.
2290 * Above initialization will also be used by Interrupt-remapping.
2291 */
2292 if (no_iommu || swiotlb || dmar_disabled)
2293 return -ENODEV;
2294
ba395927
KA
2295 iommu_init_mempool();
2296 dmar_init_reserved_ranges();
2297
2298 init_no_remapping_devices();
2299
2300 ret = init_dmars();
2301 if (ret) {
2302 printk(KERN_ERR "IOMMU: dmar init failed\n");
2303 put_iova_domain(&reserved_iova_list);
2304 iommu_exit_mempool();
2305 return ret;
2306 }
2307 printk(KERN_INFO
2308 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2309
5e0d2a6f 2310 init_timer(&unmap_timer);
ba395927
KA
2311 force_iommu = 1;
2312 dma_ops = &intel_dma_ops;
2313 return 0;
2314}
e820482c 2315
38717946
KA
2316void intel_iommu_domain_exit(struct dmar_domain *domain)
2317{
2318 u64 end;
2319
2320 /* Domain 0 is reserved, so dont process it */
2321 if (!domain)
2322 return;
2323
2324 end = DOMAIN_MAX_ADDR(domain->gaw);
2325 end = end & (~PAGE_MASK_4K);
2326
2327 /* clear ptes */
2328 dma_pte_clear_range(domain, 0, end);
2329
2330 /* free page tables */
2331 dma_pte_free_pagetable(domain, 0, end);
2332
2333 iommu_free_domain(domain);
2334 free_domain_mem(domain);
2335}
2336EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2337
2338struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2339{
2340 struct dmar_drhd_unit *drhd;
2341 struct dmar_domain *domain;
2342 struct intel_iommu *iommu;
2343
2344 drhd = dmar_find_matched_drhd_unit(pdev);
2345 if (!drhd) {
2346 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2347 return NULL;
2348 }
2349
2350 iommu = drhd->iommu;
2351 if (!iommu) {
2352 printk(KERN_ERR
2353 "intel_iommu_domain_alloc: iommu == NULL\n");
2354 return NULL;
2355 }
2356 domain = iommu_alloc_domain(iommu);
2357 if (!domain) {
2358 printk(KERN_ERR
2359 "intel_iommu_domain_alloc: domain == NULL\n");
2360 return NULL;
2361 }
2362 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2363 printk(KERN_ERR
2364 "intel_iommu_domain_alloc: domain_init() failed\n");
2365 intel_iommu_domain_exit(domain);
2366 return NULL;
2367 }
2368 return domain;
2369}
2370EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2371
2372int intel_iommu_context_mapping(
2373 struct dmar_domain *domain, struct pci_dev *pdev)
2374{
2375 int rc;
2376 rc = domain_context_mapping(domain, pdev);
2377 return rc;
2378}
2379EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2380
2381int intel_iommu_page_mapping(
2382 struct dmar_domain *domain, dma_addr_t iova,
2383 u64 hpa, size_t size, int prot)
2384{
2385 int rc;
2386 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2387 return rc;
2388}
2389EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2390
2391void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2392{
2393 detach_domain_for_dev(domain, bus, devfn);
2394}
2395EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2396
2397struct dmar_domain *
2398intel_iommu_find_domain(struct pci_dev *pdev)
2399{
2400 return find_domain(pdev);
2401}
2402EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2403
2404int intel_iommu_found(void)
2405{
2406 return g_num_of_iommus;
2407}
2408EXPORT_SYMBOL_GPL(intel_iommu_found);
2409
2410u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2411{
2412 struct dma_pte *pte;
2413 u64 pfn;
2414
2415 pfn = 0;
2416 pte = addr_to_dma_pte(domain, iova);
2417
2418 if (pte)
2419 pfn = dma_pte_addr(*pte);
2420
2421 return pfn >> PAGE_SHIFT_4K;
2422}
2423EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);