Commit | Line | Data |
---|---|---|
73fa0d10 AW |
1 | /* |
2 | * VFIO: IOMMU DMA mapping support for Type1 IOMMU | |
3 | * | |
4 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. | |
5 | * Author: Alex Williamson <alex.williamson@redhat.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | * Derived from original vfio: | |
12 | * Copyright 2010 Cisco Systems, Inc. All rights reserved. | |
13 | * Author: Tom Lyon, pugs@cisco.com | |
14 | * | |
15 | * We arbitrarily define a Type1 IOMMU as one matching the below code. | |
16 | * It could be called the x86 IOMMU as it's designed for AMD-Vi & Intel | |
17 | * VT-d, but that makes it harder to re-use as theoretically anyone | |
18 | * implementing a similar IOMMU could make use of this. We expect the | |
19 | * IOMMU to support the IOMMU API and have few to no restrictions around | |
20 | * the IOVA range that can be mapped. The Type1 IOMMU is currently | |
21 | * optimized for relatively static mappings of a userspace process with | |
22 | * userpsace pages pinned into memory. We also assume devices and IOMMU | |
23 | * domains are PCI based as the IOMMU API is still centered around a | |
24 | * device/bus interface rather than a group interface. | |
25 | */ | |
26 | ||
27 | #include <linux/compat.h> | |
28 | #include <linux/device.h> | |
29 | #include <linux/fs.h> | |
30 | #include <linux/iommu.h> | |
31 | #include <linux/module.h> | |
32 | #include <linux/mm.h> | |
33 | #include <linux/pci.h> /* pci_bus_type */ | |
34 | #include <linux/sched.h> | |
35 | #include <linux/slab.h> | |
36 | #include <linux/uaccess.h> | |
37 | #include <linux/vfio.h> | |
38 | #include <linux/workqueue.h> | |
39 | ||
40 | #define DRIVER_VERSION "0.2" | |
41 | #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" | |
42 | #define DRIVER_DESC "Type1 IOMMU driver for VFIO" | |
43 | ||
44 | static bool allow_unsafe_interrupts; | |
45 | module_param_named(allow_unsafe_interrupts, | |
46 | allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); | |
47 | MODULE_PARM_DESC(allow_unsafe_interrupts, | |
48 | "Enable VFIO IOMMU support for on platforms without interrupt remapping support."); | |
49 | ||
50 | struct vfio_iommu { | |
51 | struct iommu_domain *domain; | |
52 | struct mutex lock; | |
53 | struct list_head dma_list; | |
54 | struct list_head group_list; | |
55 | bool cache; | |
56 | }; | |
57 | ||
58 | struct vfio_dma { | |
59 | struct list_head next; | |
60 | dma_addr_t iova; /* Device address */ | |
61 | unsigned long vaddr; /* Process virtual addr */ | |
62 | long npage; /* Number of pages */ | |
63 | int prot; /* IOMMU_READ/WRITE */ | |
64 | }; | |
65 | ||
66 | struct vfio_group { | |
67 | struct iommu_group *iommu_group; | |
68 | struct list_head next; | |
69 | }; | |
70 | ||
71 | /* | |
72 | * This code handles mapping and unmapping of user data buffers | |
73 | * into DMA'ble space using the IOMMU | |
74 | */ | |
75 | ||
76 | #define NPAGE_TO_SIZE(npage) ((size_t)(npage) << PAGE_SHIFT) | |
77 | ||
78 | struct vwork { | |
79 | struct mm_struct *mm; | |
80 | long npage; | |
81 | struct work_struct work; | |
82 | }; | |
83 | ||
84 | /* delayed decrement/increment for locked_vm */ | |
85 | static void vfio_lock_acct_bg(struct work_struct *work) | |
86 | { | |
87 | struct vwork *vwork = container_of(work, struct vwork, work); | |
88 | struct mm_struct *mm; | |
89 | ||
90 | mm = vwork->mm; | |
91 | down_write(&mm->mmap_sem); | |
92 | mm->locked_vm += vwork->npage; | |
93 | up_write(&mm->mmap_sem); | |
94 | mmput(mm); | |
95 | kfree(vwork); | |
96 | } | |
97 | ||
98 | static void vfio_lock_acct(long npage) | |
99 | { | |
100 | struct vwork *vwork; | |
101 | struct mm_struct *mm; | |
102 | ||
103 | if (!current->mm) | |
104 | return; /* process exited */ | |
105 | ||
106 | if (down_write_trylock(¤t->mm->mmap_sem)) { | |
107 | current->mm->locked_vm += npage; | |
108 | up_write(¤t->mm->mmap_sem); | |
109 | return; | |
110 | } | |
111 | ||
112 | /* | |
113 | * Couldn't get mmap_sem lock, so must setup to update | |
114 | * mm->locked_vm later. If locked_vm were atomic, we | |
115 | * wouldn't need this silliness | |
116 | */ | |
117 | vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); | |
118 | if (!vwork) | |
119 | return; | |
120 | mm = get_task_mm(current); | |
121 | if (!mm) { | |
122 | kfree(vwork); | |
123 | return; | |
124 | } | |
125 | INIT_WORK(&vwork->work, vfio_lock_acct_bg); | |
126 | vwork->mm = mm; | |
127 | vwork->npage = npage; | |
128 | schedule_work(&vwork->work); | |
129 | } | |
130 | ||
131 | /* | |
132 | * Some mappings aren't backed by a struct page, for example an mmap'd | |
133 | * MMIO range for our own or another device. These use a different | |
134 | * pfn conversion and shouldn't be tracked as locked pages. | |
135 | */ | |
136 | static bool is_invalid_reserved_pfn(unsigned long pfn) | |
137 | { | |
138 | if (pfn_valid(pfn)) { | |
139 | bool reserved; | |
140 | struct page *tail = pfn_to_page(pfn); | |
def52acc | 141 | struct page *head = compound_head(tail); |
73fa0d10 AW |
142 | reserved = !!(PageReserved(head)); |
143 | if (head != tail) { | |
144 | /* | |
145 | * "head" is not a dangling pointer | |
def52acc | 146 | * (compound_head takes care of that) |
73fa0d10 AW |
147 | * but the hugepage may have been split |
148 | * from under us (and we may not hold a | |
149 | * reference count on the head page so it can | |
150 | * be reused before we run PageReferenced), so | |
151 | * we've to check PageTail before returning | |
152 | * what we just read. | |
153 | */ | |
154 | smp_rmb(); | |
155 | if (PageTail(tail)) | |
156 | return reserved; | |
157 | } | |
158 | return PageReserved(tail); | |
159 | } | |
160 | ||
161 | return true; | |
162 | } | |
163 | ||
164 | static int put_pfn(unsigned long pfn, int prot) | |
165 | { | |
166 | if (!is_invalid_reserved_pfn(pfn)) { | |
167 | struct page *page = pfn_to_page(pfn); | |
168 | if (prot & IOMMU_WRITE) | |
169 | SetPageDirty(page); | |
170 | put_page(page); | |
171 | return 1; | |
172 | } | |
173 | return 0; | |
174 | } | |
175 | ||
176 | /* Unmap DMA region */ | |
177 | static long __vfio_dma_do_unmap(struct vfio_iommu *iommu, dma_addr_t iova, | |
178 | long npage, int prot) | |
179 | { | |
180 | long i, unlocked = 0; | |
181 | ||
182 | for (i = 0; i < npage; i++, iova += PAGE_SIZE) { | |
183 | unsigned long pfn; | |
184 | ||
185 | pfn = iommu_iova_to_phys(iommu->domain, iova) >> PAGE_SHIFT; | |
186 | if (pfn) { | |
187 | iommu_unmap(iommu->domain, iova, PAGE_SIZE); | |
188 | unlocked += put_pfn(pfn, prot); | |
189 | } | |
190 | } | |
191 | return unlocked; | |
192 | } | |
193 | ||
194 | static void vfio_dma_unmap(struct vfio_iommu *iommu, dma_addr_t iova, | |
195 | long npage, int prot) | |
196 | { | |
197 | long unlocked; | |
198 | ||
199 | unlocked = __vfio_dma_do_unmap(iommu, iova, npage, prot); | |
200 | vfio_lock_acct(-unlocked); | |
201 | } | |
202 | ||
203 | static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) | |
204 | { | |
205 | struct page *page[1]; | |
206 | struct vm_area_struct *vma; | |
207 | int ret = -EFAULT; | |
208 | ||
209 | if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { | |
210 | *pfn = page_to_pfn(page[0]); | |
211 | return 0; | |
212 | } | |
213 | ||
214 | down_read(¤t->mm->mmap_sem); | |
215 | ||
216 | vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); | |
217 | ||
218 | if (vma && vma->vm_flags & VM_PFNMAP) { | |
219 | *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | |
220 | if (is_invalid_reserved_pfn(*pfn)) | |
221 | ret = 0; | |
222 | } | |
223 | ||
224 | up_read(¤t->mm->mmap_sem); | |
225 | ||
226 | return ret; | |
227 | } | |
228 | ||
229 | /* Map DMA region */ | |
230 | static int __vfio_dma_map(struct vfio_iommu *iommu, dma_addr_t iova, | |
231 | unsigned long vaddr, long npage, int prot) | |
232 | { | |
233 | dma_addr_t start = iova; | |
234 | long i, locked = 0; | |
235 | int ret; | |
236 | ||
237 | /* Verify that pages are not already mapped */ | |
238 | for (i = 0; i < npage; i++, iova += PAGE_SIZE) | |
239 | if (iommu_iova_to_phys(iommu->domain, iova)) | |
240 | return -EBUSY; | |
241 | ||
242 | iova = start; | |
243 | ||
244 | if (iommu->cache) | |
245 | prot |= IOMMU_CACHE; | |
246 | ||
247 | /* | |
248 | * XXX We break mappings into pages and use get_user_pages_fast to | |
249 | * pin the pages in memory. It's been suggested that mlock might | |
250 | * provide a more efficient mechanism, but nothing prevents the | |
251 | * user from munlocking the pages, which could then allow the user | |
252 | * access to random host memory. We also have no guarantee from the | |
253 | * IOMMU API that the iommu driver can unmap sub-pages of previous | |
254 | * mappings. This means we might lose an entire range if a single | |
255 | * page within it is unmapped. Single page mappings are inefficient, | |
256 | * but provide the most flexibility for now. | |
257 | */ | |
258 | for (i = 0; i < npage; i++, iova += PAGE_SIZE, vaddr += PAGE_SIZE) { | |
259 | unsigned long pfn = 0; | |
260 | ||
261 | ret = vaddr_get_pfn(vaddr, prot, &pfn); | |
262 | if (ret) { | |
263 | __vfio_dma_do_unmap(iommu, start, i, prot); | |
264 | return ret; | |
265 | } | |
266 | ||
267 | /* | |
268 | * Only add actual locked pages to accounting | |
269 | * XXX We're effectively marking a page locked for every | |
270 | * IOVA page even though it's possible the user could be | |
271 | * backing multiple IOVAs with the same vaddr. This over- | |
272 | * penalizes the user process, but we currently have no | |
273 | * easy way to do this properly. | |
274 | */ | |
275 | if (!is_invalid_reserved_pfn(pfn)) | |
276 | locked++; | |
277 | ||
278 | ret = iommu_map(iommu->domain, iova, | |
279 | (phys_addr_t)pfn << PAGE_SHIFT, | |
280 | PAGE_SIZE, prot); | |
281 | if (ret) { | |
282 | /* Back out mappings on error */ | |
283 | put_pfn(pfn, prot); | |
284 | __vfio_dma_do_unmap(iommu, start, i, prot); | |
285 | return ret; | |
286 | } | |
287 | } | |
288 | vfio_lock_acct(locked); | |
289 | return 0; | |
290 | } | |
291 | ||
292 | static inline bool ranges_overlap(dma_addr_t start1, size_t size1, | |
293 | dma_addr_t start2, size_t size2) | |
294 | { | |
295 | if (start1 < start2) | |
296 | return (start2 - start1 < size1); | |
297 | else if (start2 < start1) | |
298 | return (start1 - start2 < size2); | |
299 | return (size1 > 0 && size2 > 0); | |
300 | } | |
301 | ||
302 | static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, | |
303 | dma_addr_t start, size_t size) | |
304 | { | |
305 | struct vfio_dma *dma; | |
306 | ||
307 | list_for_each_entry(dma, &iommu->dma_list, next) { | |
308 | if (ranges_overlap(dma->iova, NPAGE_TO_SIZE(dma->npage), | |
309 | start, size)) | |
310 | return dma; | |
311 | } | |
312 | return NULL; | |
313 | } | |
314 | ||
315 | static long vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start, | |
316 | size_t size, struct vfio_dma *dma) | |
317 | { | |
318 | struct vfio_dma *split; | |
319 | long npage_lo, npage_hi; | |
320 | ||
321 | /* Existing dma region is completely covered, unmap all */ | |
322 | if (start <= dma->iova && | |
323 | start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) { | |
324 | vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot); | |
325 | list_del(&dma->next); | |
326 | npage_lo = dma->npage; | |
327 | kfree(dma); | |
328 | return npage_lo; | |
329 | } | |
330 | ||
331 | /* Overlap low address of existing range */ | |
332 | if (start <= dma->iova) { | |
333 | size_t overlap; | |
334 | ||
335 | overlap = start + size - dma->iova; | |
336 | npage_lo = overlap >> PAGE_SHIFT; | |
337 | ||
338 | vfio_dma_unmap(iommu, dma->iova, npage_lo, dma->prot); | |
339 | dma->iova += overlap; | |
340 | dma->vaddr += overlap; | |
341 | dma->npage -= npage_lo; | |
342 | return npage_lo; | |
343 | } | |
344 | ||
345 | /* Overlap high address of existing range */ | |
346 | if (start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) { | |
347 | size_t overlap; | |
348 | ||
349 | overlap = dma->iova + NPAGE_TO_SIZE(dma->npage) - start; | |
350 | npage_hi = overlap >> PAGE_SHIFT; | |
351 | ||
352 | vfio_dma_unmap(iommu, start, npage_hi, dma->prot); | |
353 | dma->npage -= npage_hi; | |
354 | return npage_hi; | |
355 | } | |
356 | ||
357 | /* Split existing */ | |
358 | npage_lo = (start - dma->iova) >> PAGE_SHIFT; | |
359 | npage_hi = dma->npage - (size >> PAGE_SHIFT) - npage_lo; | |
360 | ||
361 | split = kzalloc(sizeof *split, GFP_KERNEL); | |
362 | if (!split) | |
363 | return -ENOMEM; | |
364 | ||
365 | vfio_dma_unmap(iommu, start, size >> PAGE_SHIFT, dma->prot); | |
366 | ||
367 | dma->npage = npage_lo; | |
368 | ||
369 | split->npage = npage_hi; | |
370 | split->iova = start + size; | |
371 | split->vaddr = dma->vaddr + NPAGE_TO_SIZE(npage_lo) + size; | |
372 | split->prot = dma->prot; | |
373 | list_add(&split->next, &iommu->dma_list); | |
374 | return size >> PAGE_SHIFT; | |
375 | } | |
376 | ||
377 | static int vfio_dma_do_unmap(struct vfio_iommu *iommu, | |
378 | struct vfio_iommu_type1_dma_unmap *unmap) | |
379 | { | |
380 | long ret = 0, npage = unmap->size >> PAGE_SHIFT; | |
381 | struct vfio_dma *dma, *tmp; | |
382 | uint64_t mask; | |
383 | ||
384 | mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1; | |
385 | ||
386 | if (unmap->iova & mask) | |
387 | return -EINVAL; | |
388 | if (unmap->size & mask) | |
389 | return -EINVAL; | |
390 | ||
391 | /* XXX We still break these down into PAGE_SIZE */ | |
392 | WARN_ON(mask & PAGE_MASK); | |
393 | ||
394 | mutex_lock(&iommu->lock); | |
395 | ||
396 | list_for_each_entry_safe(dma, tmp, &iommu->dma_list, next) { | |
397 | if (ranges_overlap(dma->iova, NPAGE_TO_SIZE(dma->npage), | |
398 | unmap->iova, unmap->size)) { | |
399 | ret = vfio_remove_dma_overlap(iommu, unmap->iova, | |
400 | unmap->size, dma); | |
401 | if (ret > 0) | |
402 | npage -= ret; | |
403 | if (ret < 0 || npage == 0) | |
404 | break; | |
405 | } | |
406 | } | |
407 | mutex_unlock(&iommu->lock); | |
408 | return ret > 0 ? 0 : (int)ret; | |
409 | } | |
410 | ||
411 | static int vfio_dma_do_map(struct vfio_iommu *iommu, | |
412 | struct vfio_iommu_type1_dma_map *map) | |
413 | { | |
414 | struct vfio_dma *dma, *pdma = NULL; | |
415 | dma_addr_t iova = map->iova; | |
416 | unsigned long locked, lock_limit, vaddr = map->vaddr; | |
417 | size_t size = map->size; | |
418 | int ret = 0, prot = 0; | |
419 | uint64_t mask; | |
420 | long npage; | |
421 | ||
422 | mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1; | |
423 | ||
424 | /* READ/WRITE from device perspective */ | |
425 | if (map->flags & VFIO_DMA_MAP_FLAG_WRITE) | |
426 | prot |= IOMMU_WRITE; | |
427 | if (map->flags & VFIO_DMA_MAP_FLAG_READ) | |
428 | prot |= IOMMU_READ; | |
429 | ||
430 | if (!prot) | |
431 | return -EINVAL; /* No READ/WRITE? */ | |
432 | ||
433 | if (vaddr & mask) | |
434 | return -EINVAL; | |
435 | if (iova & mask) | |
436 | return -EINVAL; | |
437 | if (size & mask) | |
438 | return -EINVAL; | |
439 | ||
440 | /* XXX We still break these down into PAGE_SIZE */ | |
441 | WARN_ON(mask & PAGE_MASK); | |
442 | ||
443 | /* Don't allow IOVA wrap */ | |
444 | if (iova + size && iova + size < iova) | |
445 | return -EINVAL; | |
446 | ||
447 | /* Don't allow virtual address wrap */ | |
448 | if (vaddr + size && vaddr + size < vaddr) | |
449 | return -EINVAL; | |
450 | ||
451 | npage = size >> PAGE_SHIFT; | |
452 | if (!npage) | |
453 | return -EINVAL; | |
454 | ||
455 | mutex_lock(&iommu->lock); | |
456 | ||
457 | if (vfio_find_dma(iommu, iova, size)) { | |
458 | ret = -EBUSY; | |
459 | goto out_lock; | |
460 | } | |
461 | ||
462 | /* account for locked pages */ | |
463 | locked = current->mm->locked_vm + npage; | |
464 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | |
465 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { | |
466 | pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", | |
467 | __func__, rlimit(RLIMIT_MEMLOCK)); | |
468 | ret = -ENOMEM; | |
469 | goto out_lock; | |
470 | } | |
471 | ||
472 | ret = __vfio_dma_map(iommu, iova, vaddr, npage, prot); | |
473 | if (ret) | |
474 | goto out_lock; | |
475 | ||
476 | /* Check if we abut a region below - nothing below 0 */ | |
477 | if (iova) { | |
478 | dma = vfio_find_dma(iommu, iova - 1, 1); | |
479 | if (dma && dma->prot == prot && | |
480 | dma->vaddr + NPAGE_TO_SIZE(dma->npage) == vaddr) { | |
481 | ||
482 | dma->npage += npage; | |
483 | iova = dma->iova; | |
484 | vaddr = dma->vaddr; | |
485 | npage = dma->npage; | |
486 | size = NPAGE_TO_SIZE(npage); | |
487 | ||
488 | pdma = dma; | |
489 | } | |
490 | } | |
491 | ||
492 | /* Check if we abut a region above - nothing above ~0 + 1 */ | |
493 | if (iova + size) { | |
494 | dma = vfio_find_dma(iommu, iova + size, 1); | |
495 | if (dma && dma->prot == prot && | |
496 | dma->vaddr == vaddr + size) { | |
497 | ||
498 | dma->npage += npage; | |
499 | dma->iova = iova; | |
500 | dma->vaddr = vaddr; | |
501 | ||
502 | /* | |
503 | * If merged above and below, remove previously | |
504 | * merged entry. New entry covers it. | |
505 | */ | |
506 | if (pdma) { | |
507 | list_del(&pdma->next); | |
508 | kfree(pdma); | |
509 | } | |
510 | pdma = dma; | |
511 | } | |
512 | } | |
513 | ||
514 | /* Isolated, new region */ | |
515 | if (!pdma) { | |
516 | dma = kzalloc(sizeof *dma, GFP_KERNEL); | |
517 | if (!dma) { | |
518 | ret = -ENOMEM; | |
519 | vfio_dma_unmap(iommu, iova, npage, prot); | |
520 | goto out_lock; | |
521 | } | |
522 | ||
523 | dma->npage = npage; | |
524 | dma->iova = iova; | |
525 | dma->vaddr = vaddr; | |
526 | dma->prot = prot; | |
527 | list_add(&dma->next, &iommu->dma_list); | |
528 | } | |
529 | ||
530 | out_lock: | |
531 | mutex_unlock(&iommu->lock); | |
532 | return ret; | |
533 | } | |
534 | ||
535 | static int vfio_iommu_type1_attach_group(void *iommu_data, | |
536 | struct iommu_group *iommu_group) | |
537 | { | |
538 | struct vfio_iommu *iommu = iommu_data; | |
539 | struct vfio_group *group, *tmp; | |
540 | int ret; | |
541 | ||
542 | group = kzalloc(sizeof(*group), GFP_KERNEL); | |
543 | if (!group) | |
544 | return -ENOMEM; | |
545 | ||
546 | mutex_lock(&iommu->lock); | |
547 | ||
548 | list_for_each_entry(tmp, &iommu->group_list, next) { | |
549 | if (tmp->iommu_group == iommu_group) { | |
550 | mutex_unlock(&iommu->lock); | |
551 | kfree(group); | |
552 | return -EINVAL; | |
553 | } | |
554 | } | |
555 | ||
556 | /* | |
557 | * TODO: Domain have capabilities that might change as we add | |
558 | * groups (see iommu->cache, currently never set). Check for | |
559 | * them and potentially disallow groups to be attached when it | |
560 | * would change capabilities (ugh). | |
561 | */ | |
562 | ret = iommu_attach_group(iommu->domain, iommu_group); | |
563 | if (ret) { | |
564 | mutex_unlock(&iommu->lock); | |
565 | kfree(group); | |
566 | return ret; | |
567 | } | |
568 | ||
569 | group->iommu_group = iommu_group; | |
570 | list_add(&group->next, &iommu->group_list); | |
571 | ||
572 | mutex_unlock(&iommu->lock); | |
573 | ||
574 | return 0; | |
575 | } | |
576 | ||
577 | static void vfio_iommu_type1_detach_group(void *iommu_data, | |
578 | struct iommu_group *iommu_group) | |
579 | { | |
580 | struct vfio_iommu *iommu = iommu_data; | |
581 | struct vfio_group *group; | |
582 | ||
583 | mutex_lock(&iommu->lock); | |
584 | ||
585 | list_for_each_entry(group, &iommu->group_list, next) { | |
586 | if (group->iommu_group == iommu_group) { | |
587 | iommu_detach_group(iommu->domain, iommu_group); | |
588 | list_del(&group->next); | |
589 | kfree(group); | |
590 | break; | |
591 | } | |
592 | } | |
593 | ||
594 | mutex_unlock(&iommu->lock); | |
595 | } | |
596 | ||
597 | static void *vfio_iommu_type1_open(unsigned long arg) | |
598 | { | |
599 | struct vfio_iommu *iommu; | |
600 | ||
601 | if (arg != VFIO_TYPE1_IOMMU) | |
602 | return ERR_PTR(-EINVAL); | |
603 | ||
604 | iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); | |
605 | if (!iommu) | |
606 | return ERR_PTR(-ENOMEM); | |
607 | ||
608 | INIT_LIST_HEAD(&iommu->group_list); | |
609 | INIT_LIST_HEAD(&iommu->dma_list); | |
610 | mutex_init(&iommu->lock); | |
611 | ||
612 | /* | |
613 | * Wish we didn't have to know about bus_type here. | |
614 | */ | |
615 | iommu->domain = iommu_domain_alloc(&pci_bus_type); | |
616 | if (!iommu->domain) { | |
617 | kfree(iommu); | |
618 | return ERR_PTR(-EIO); | |
619 | } | |
620 | ||
621 | /* | |
622 | * Wish we could specify required capabilities rather than create | |
623 | * a domain, see what comes out and hope it doesn't change along | |
624 | * the way. Fortunately we know interrupt remapping is global for | |
625 | * our iommus. | |
626 | */ | |
627 | if (!allow_unsafe_interrupts && | |
628 | !iommu_domain_has_cap(iommu->domain, IOMMU_CAP_INTR_REMAP)) { | |
629 | pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n", | |
630 | __func__); | |
631 | iommu_domain_free(iommu->domain); | |
632 | kfree(iommu); | |
633 | return ERR_PTR(-EPERM); | |
634 | } | |
635 | ||
636 | return iommu; | |
637 | } | |
638 | ||
639 | static void vfio_iommu_type1_release(void *iommu_data) | |
640 | { | |
641 | struct vfio_iommu *iommu = iommu_data; | |
642 | struct vfio_group *group, *group_tmp; | |
643 | struct vfio_dma *dma, *dma_tmp; | |
644 | ||
645 | list_for_each_entry_safe(group, group_tmp, &iommu->group_list, next) { | |
646 | iommu_detach_group(iommu->domain, group->iommu_group); | |
647 | list_del(&group->next); | |
648 | kfree(group); | |
649 | } | |
650 | ||
651 | list_for_each_entry_safe(dma, dma_tmp, &iommu->dma_list, next) { | |
652 | vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot); | |
653 | list_del(&dma->next); | |
654 | kfree(dma); | |
655 | } | |
656 | ||
657 | iommu_domain_free(iommu->domain); | |
658 | iommu->domain = NULL; | |
659 | kfree(iommu); | |
660 | } | |
661 | ||
662 | static long vfio_iommu_type1_ioctl(void *iommu_data, | |
663 | unsigned int cmd, unsigned long arg) | |
664 | { | |
665 | struct vfio_iommu *iommu = iommu_data; | |
666 | unsigned long minsz; | |
667 | ||
668 | if (cmd == VFIO_CHECK_EXTENSION) { | |
669 | switch (arg) { | |
670 | case VFIO_TYPE1_IOMMU: | |
671 | return 1; | |
672 | default: | |
673 | return 0; | |
674 | } | |
675 | } else if (cmd == VFIO_IOMMU_GET_INFO) { | |
676 | struct vfio_iommu_type1_info info; | |
677 | ||
678 | minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); | |
679 | ||
680 | if (copy_from_user(&info, (void __user *)arg, minsz)) | |
681 | return -EFAULT; | |
682 | ||
683 | if (info.argsz < minsz) | |
684 | return -EINVAL; | |
685 | ||
686 | info.flags = 0; | |
687 | ||
688 | info.iova_pgsizes = iommu->domain->ops->pgsize_bitmap; | |
689 | ||
690 | return copy_to_user((void __user *)arg, &info, minsz); | |
691 | ||
692 | } else if (cmd == VFIO_IOMMU_MAP_DMA) { | |
693 | struct vfio_iommu_type1_dma_map map; | |
694 | uint32_t mask = VFIO_DMA_MAP_FLAG_READ | | |
695 | VFIO_DMA_MAP_FLAG_WRITE; | |
696 | ||
697 | minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); | |
698 | ||
699 | if (copy_from_user(&map, (void __user *)arg, minsz)) | |
700 | return -EFAULT; | |
701 | ||
702 | if (map.argsz < minsz || map.flags & ~mask) | |
703 | return -EINVAL; | |
704 | ||
705 | return vfio_dma_do_map(iommu, &map); | |
706 | ||
707 | } else if (cmd == VFIO_IOMMU_UNMAP_DMA) { | |
708 | struct vfio_iommu_type1_dma_unmap unmap; | |
709 | ||
710 | minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); | |
711 | ||
712 | if (copy_from_user(&unmap, (void __user *)arg, minsz)) | |
713 | return -EFAULT; | |
714 | ||
715 | if (unmap.argsz < minsz || unmap.flags) | |
716 | return -EINVAL; | |
717 | ||
718 | return vfio_dma_do_unmap(iommu, &unmap); | |
719 | } | |
720 | ||
721 | return -ENOTTY; | |
722 | } | |
723 | ||
724 | static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { | |
725 | .name = "vfio-iommu-type1", | |
726 | .owner = THIS_MODULE, | |
727 | .open = vfio_iommu_type1_open, | |
728 | .release = vfio_iommu_type1_release, | |
729 | .ioctl = vfio_iommu_type1_ioctl, | |
730 | .attach_group = vfio_iommu_type1_attach_group, | |
731 | .detach_group = vfio_iommu_type1_detach_group, | |
732 | }; | |
733 | ||
734 | static int __init vfio_iommu_type1_init(void) | |
735 | { | |
736 | if (!iommu_present(&pci_bus_type)) | |
737 | return -ENODEV; | |
738 | ||
739 | return vfio_register_iommu_driver(&vfio_iommu_driver_ops_type1); | |
740 | } | |
741 | ||
742 | static void __exit vfio_iommu_type1_cleanup(void) | |
743 | { | |
744 | vfio_unregister_iommu_driver(&vfio_iommu_driver_ops_type1); | |
745 | } | |
746 | ||
747 | module_init(vfio_iommu_type1_init); | |
748 | module_exit(vfio_iommu_type1_cleanup); | |
749 | ||
750 | MODULE_VERSION(DRIVER_VERSION); | |
751 | MODULE_LICENSE("GPL v2"); | |
752 | MODULE_AUTHOR(DRIVER_AUTHOR); | |
753 | MODULE_DESCRIPTION(DRIVER_DESC); |