2 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
3 * Author: Alex Williamson <alex.williamson@redhat.com>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
9 * Derived from original vfio:
10 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
11 * Author: Tom Lyon, pugs@cisco.com
14 #include <linux/device.h>
15 #include <linux/eventfd.h>
16 #include <linux/file.h>
17 #include <linux/interrupt.h>
18 #include <linux/iommu.h>
19 #include <linux/module.h>
20 #include <linux/mutex.h>
21 #include <linux/notifier.h>
22 #include <linux/pci.h>
23 #include <linux/pm_runtime.h>
24 #include <linux/slab.h>
25 #include <linux/types.h>
26 #include <linux/uaccess.h>
27 #include <linux/vfio.h>
29 #include "vfio_pci_private.h"
31 #define DRIVER_VERSION "0.2"
32 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
33 #define DRIVER_DESC "VFIO PCI - User Level meta-driver"
35 static bool nointxmask
;
36 module_param_named(nointxmask
, nointxmask
, bool, S_IRUGO
| S_IWUSR
);
37 MODULE_PARM_DESC(nointxmask
,
38 "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");
40 static int vfio_pci_enable(struct vfio_pci_device
*vdev
)
42 struct pci_dev
*pdev
= vdev
->pdev
;
47 ret
= pci_enable_device(pdev
);
51 vdev
->reset_works
= (pci_reset_function(pdev
) == 0);
53 vdev
->pci_saved_state
= pci_store_saved_state(pdev
);
54 if (!vdev
->pci_saved_state
)
55 pr_debug("%s: Couldn't store %s saved state\n",
56 __func__
, dev_name(&pdev
->dev
));
58 ret
= vfio_config_init(vdev
);
60 kfree(vdev
->pci_saved_state
);
61 vdev
->pci_saved_state
= NULL
;
62 pci_disable_device(pdev
);
66 if (likely(!nointxmask
))
67 vdev
->pci_2_3
= pci_intx_mask_supported(pdev
);
69 pci_read_config_word(pdev
, PCI_COMMAND
, &cmd
);
70 if (vdev
->pci_2_3
&& (cmd
& PCI_COMMAND_INTX_DISABLE
)) {
71 cmd
&= ~PCI_COMMAND_INTX_DISABLE
;
72 pci_write_config_word(pdev
, PCI_COMMAND
, cmd
);
75 msix_pos
= pdev
->msix_cap
;
80 pci_read_config_word(pdev
, msix_pos
+ PCI_MSIX_FLAGS
, &flags
);
81 pci_read_config_dword(pdev
, msix_pos
+ PCI_MSIX_TABLE
, &table
);
83 vdev
->msix_bar
= table
& PCI_MSIX_TABLE_BIR
;
84 vdev
->msix_offset
= table
& PCI_MSIX_TABLE_OFFSET
;
85 vdev
->msix_size
= ((flags
& PCI_MSIX_FLAGS_QSIZE
) + 1) * 16;
87 vdev
->msix_bar
= 0xFF;
89 #ifdef CONFIG_VFIO_PCI_VGA
90 if ((pdev
->class >> 8) == PCI_CLASS_DISPLAY_VGA
)
97 static void vfio_pci_disable(struct vfio_pci_device
*vdev
)
99 struct pci_dev
*pdev
= vdev
->pdev
;
102 pci_disable_device(pdev
);
104 vfio_pci_set_irqs_ioctl(vdev
, VFIO_IRQ_SET_DATA_NONE
|
105 VFIO_IRQ_SET_ACTION_TRIGGER
,
106 vdev
->irq_type
, 0, 0, NULL
);
108 vdev
->virq_disabled
= false;
110 vfio_config_free(vdev
);
112 for (bar
= PCI_STD_RESOURCES
; bar
<= PCI_STD_RESOURCE_END
; bar
++) {
113 if (!vdev
->barmap
[bar
])
115 pci_iounmap(pdev
, vdev
->barmap
[bar
]);
116 pci_release_selected_regions(pdev
, 1 << bar
);
117 vdev
->barmap
[bar
] = NULL
;
121 * If we have saved state, restore it. If we can reset the device,
122 * even better. Resetting with current state seems better than
123 * nothing, but saving and restoring current state without reset
126 if (pci_load_and_free_saved_state(pdev
, &vdev
->pci_saved_state
)) {
127 pr_info("%s: Couldn't reload %s saved state\n",
128 __func__
, dev_name(&pdev
->dev
));
130 if (!vdev
->reset_works
)
133 pci_save_state(pdev
);
137 * Disable INTx and MSI, presumably to avoid spurious interrupts
138 * during reset. Stolen from pci_reset_function()
140 pci_write_config_word(pdev
, PCI_COMMAND
, PCI_COMMAND_INTX_DISABLE
);
143 * Try to reset the device. The success of this is dependent on
144 * being able to lock the device, which is not always possible.
146 if (vdev
->reset_works
) {
147 int ret
= pci_try_reset_function(pdev
);
149 pr_warn("%s: Failed to reset device %s (%d)\n",
150 __func__
, dev_name(&pdev
->dev
), ret
);
153 pci_restore_state(pdev
);
156 static void vfio_pci_release(void *device_data
)
158 struct vfio_pci_device
*vdev
= device_data
;
160 if (atomic_dec_and_test(&vdev
->refcnt
))
161 vfio_pci_disable(vdev
);
163 module_put(THIS_MODULE
);
166 static int vfio_pci_open(void *device_data
)
168 struct vfio_pci_device
*vdev
= device_data
;
170 if (!try_module_get(THIS_MODULE
))
173 if (atomic_inc_return(&vdev
->refcnt
) == 1) {
174 int ret
= vfio_pci_enable(vdev
);
176 module_put(THIS_MODULE
);
184 static int vfio_pci_get_irq_count(struct vfio_pci_device
*vdev
, int irq_type
)
186 if (irq_type
== VFIO_PCI_INTX_IRQ_INDEX
) {
188 pci_read_config_byte(vdev
->pdev
, PCI_INTERRUPT_PIN
, &pin
);
192 } else if (irq_type
== VFIO_PCI_MSI_IRQ_INDEX
) {
196 pos
= vdev
->pdev
->msi_cap
;
198 pci_read_config_word(vdev
->pdev
,
199 pos
+ PCI_MSI_FLAGS
, &flags
);
201 return 1 << (flags
& PCI_MSI_FLAGS_QMASK
);
203 } else if (irq_type
== VFIO_PCI_MSIX_IRQ_INDEX
) {
207 pos
= vdev
->pdev
->msix_cap
;
209 pci_read_config_word(vdev
->pdev
,
210 pos
+ PCI_MSIX_FLAGS
, &flags
);
212 return (flags
& PCI_MSIX_FLAGS_QSIZE
) + 1;
214 } else if (irq_type
== VFIO_PCI_ERR_IRQ_INDEX
)
215 if (pci_is_pcie(vdev
->pdev
))
221 static int vfio_pci_count_devs(struct pci_dev
*pdev
, void *data
)
227 struct vfio_pci_fill_info
{
230 struct vfio_pci_dependent_device
*devices
;
233 static int vfio_pci_fill_devs(struct pci_dev
*pdev
, void *data
)
235 struct vfio_pci_fill_info
*fill
= data
;
236 struct iommu_group
*iommu_group
;
238 if (fill
->cur
== fill
->max
)
239 return -EAGAIN
; /* Something changed, try again */
241 iommu_group
= iommu_group_get(&pdev
->dev
);
243 return -EPERM
; /* Cannot reset non-isolated devices */
245 fill
->devices
[fill
->cur
].group_id
= iommu_group_id(iommu_group
);
246 fill
->devices
[fill
->cur
].segment
= pci_domain_nr(pdev
->bus
);
247 fill
->devices
[fill
->cur
].bus
= pdev
->bus
->number
;
248 fill
->devices
[fill
->cur
].devfn
= pdev
->devfn
;
250 iommu_group_put(iommu_group
);
254 struct vfio_pci_group_entry
{
255 struct vfio_group
*group
;
259 struct vfio_pci_group_info
{
261 struct vfio_pci_group_entry
*groups
;
264 static int vfio_pci_validate_devs(struct pci_dev
*pdev
, void *data
)
266 struct vfio_pci_group_info
*info
= data
;
267 struct iommu_group
*group
;
270 group
= iommu_group_get(&pdev
->dev
);
274 id
= iommu_group_id(group
);
276 for (i
= 0; i
< info
->count
; i
++)
277 if (info
->groups
[i
].id
== id
)
280 iommu_group_put(group
);
282 return (i
== info
->count
) ? -EINVAL
: 0;
285 static bool vfio_pci_dev_below_slot(struct pci_dev
*pdev
, struct pci_slot
*slot
)
287 for (; pdev
; pdev
= pdev
->bus
->self
)
288 if (pdev
->bus
== slot
->bus
)
289 return (pdev
->slot
== slot
);
293 struct vfio_pci_walk_info
{
294 int (*fn
)(struct pci_dev
*, void *data
);
296 struct pci_dev
*pdev
;
301 static int vfio_pci_walk_wrapper(struct pci_dev
*pdev
, void *data
)
303 struct vfio_pci_walk_info
*walk
= data
;
305 if (!walk
->slot
|| vfio_pci_dev_below_slot(pdev
, walk
->pdev
->slot
))
306 walk
->ret
= walk
->fn(pdev
, walk
->data
);
311 static int vfio_pci_for_each_slot_or_bus(struct pci_dev
*pdev
,
312 int (*fn
)(struct pci_dev
*,
313 void *data
), void *data
,
316 struct vfio_pci_walk_info walk
= {
317 .fn
= fn
, .data
= data
, .pdev
= pdev
, .slot
= slot
, .ret
= 0,
320 pci_walk_bus(pdev
->bus
, vfio_pci_walk_wrapper
, &walk
);
325 static long vfio_pci_ioctl(void *device_data
,
326 unsigned int cmd
, unsigned long arg
)
328 struct vfio_pci_device
*vdev
= device_data
;
331 if (cmd
== VFIO_DEVICE_GET_INFO
) {
332 struct vfio_device_info info
;
334 minsz
= offsetofend(struct vfio_device_info
, num_irqs
);
336 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
339 if (info
.argsz
< minsz
)
342 info
.flags
= VFIO_DEVICE_FLAGS_PCI
;
344 if (vdev
->reset_works
)
345 info
.flags
|= VFIO_DEVICE_FLAGS_RESET
;
347 info
.num_regions
= VFIO_PCI_NUM_REGIONS
;
348 info
.num_irqs
= VFIO_PCI_NUM_IRQS
;
350 return copy_to_user((void __user
*)arg
, &info
, minsz
);
352 } else if (cmd
== VFIO_DEVICE_GET_REGION_INFO
) {
353 struct pci_dev
*pdev
= vdev
->pdev
;
354 struct vfio_region_info info
;
356 minsz
= offsetofend(struct vfio_region_info
, offset
);
358 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
361 if (info
.argsz
< minsz
)
364 switch (info
.index
) {
365 case VFIO_PCI_CONFIG_REGION_INDEX
:
366 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
367 info
.size
= pdev
->cfg_size
;
368 info
.flags
= VFIO_REGION_INFO_FLAG_READ
|
369 VFIO_REGION_INFO_FLAG_WRITE
;
371 case VFIO_PCI_BAR0_REGION_INDEX
... VFIO_PCI_BAR5_REGION_INDEX
:
372 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
373 info
.size
= pci_resource_len(pdev
, info
.index
);
379 info
.flags
= VFIO_REGION_INFO_FLAG_READ
|
380 VFIO_REGION_INFO_FLAG_WRITE
;
381 if (pci_resource_flags(pdev
, info
.index
) &
382 IORESOURCE_MEM
&& info
.size
>= PAGE_SIZE
)
383 info
.flags
|= VFIO_REGION_INFO_FLAG_MMAP
;
385 case VFIO_PCI_ROM_REGION_INDEX
:
390 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
393 /* Report the BAR size, not the ROM size */
394 info
.size
= pci_resource_len(pdev
, info
.index
);
398 /* Is it really there? */
399 io
= pci_map_rom(pdev
, &size
);
404 pci_unmap_rom(pdev
, io
);
406 info
.flags
= VFIO_REGION_INFO_FLAG_READ
;
409 case VFIO_PCI_VGA_REGION_INDEX
:
413 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
415 info
.flags
= VFIO_REGION_INFO_FLAG_READ
|
416 VFIO_REGION_INFO_FLAG_WRITE
;
423 return copy_to_user((void __user
*)arg
, &info
, minsz
);
425 } else if (cmd
== VFIO_DEVICE_GET_IRQ_INFO
) {
426 struct vfio_irq_info info
;
428 minsz
= offsetofend(struct vfio_irq_info
, count
);
430 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
433 if (info
.argsz
< minsz
|| info
.index
>= VFIO_PCI_NUM_IRQS
)
436 switch (info
.index
) {
437 case VFIO_PCI_INTX_IRQ_INDEX
... VFIO_PCI_MSIX_IRQ_INDEX
:
439 case VFIO_PCI_ERR_IRQ_INDEX
:
440 if (pci_is_pcie(vdev
->pdev
))
442 /* pass thru to return error */
447 info
.flags
= VFIO_IRQ_INFO_EVENTFD
;
449 info
.count
= vfio_pci_get_irq_count(vdev
, info
.index
);
451 if (info
.index
== VFIO_PCI_INTX_IRQ_INDEX
)
452 info
.flags
|= (VFIO_IRQ_INFO_MASKABLE
|
453 VFIO_IRQ_INFO_AUTOMASKED
);
455 info
.flags
|= VFIO_IRQ_INFO_NORESIZE
;
457 return copy_to_user((void __user
*)arg
, &info
, minsz
);
459 } else if (cmd
== VFIO_DEVICE_SET_IRQS
) {
460 struct vfio_irq_set hdr
;
464 minsz
= offsetofend(struct vfio_irq_set
, count
);
466 if (copy_from_user(&hdr
, (void __user
*)arg
, minsz
))
469 if (hdr
.argsz
< minsz
|| hdr
.index
>= VFIO_PCI_NUM_IRQS
||
470 hdr
.flags
& ~(VFIO_IRQ_SET_DATA_TYPE_MASK
|
471 VFIO_IRQ_SET_ACTION_TYPE_MASK
))
474 if (!(hdr
.flags
& VFIO_IRQ_SET_DATA_NONE
)) {
476 int max
= vfio_pci_get_irq_count(vdev
, hdr
.index
);
478 if (hdr
.flags
& VFIO_IRQ_SET_DATA_BOOL
)
479 size
= sizeof(uint8_t);
480 else if (hdr
.flags
& VFIO_IRQ_SET_DATA_EVENTFD
)
481 size
= sizeof(int32_t);
485 if (hdr
.argsz
- minsz
< hdr
.count
* size
||
486 hdr
.start
>= max
|| hdr
.start
+ hdr
.count
> max
)
489 data
= memdup_user((void __user
*)(arg
+ minsz
),
492 return PTR_ERR(data
);
495 mutex_lock(&vdev
->igate
);
497 ret
= vfio_pci_set_irqs_ioctl(vdev
, hdr
.flags
, hdr
.index
,
498 hdr
.start
, hdr
.count
, data
);
500 mutex_unlock(&vdev
->igate
);
505 } else if (cmd
== VFIO_DEVICE_RESET
) {
506 return vdev
->reset_works
?
507 pci_try_reset_function(vdev
->pdev
) : -EINVAL
;
509 } else if (cmd
== VFIO_DEVICE_GET_PCI_HOT_RESET_INFO
) {
510 struct vfio_pci_hot_reset_info hdr
;
511 struct vfio_pci_fill_info fill
= { 0 };
512 struct vfio_pci_dependent_device
*devices
= NULL
;
516 minsz
= offsetofend(struct vfio_pci_hot_reset_info
, count
);
518 if (copy_from_user(&hdr
, (void __user
*)arg
, minsz
))
521 if (hdr
.argsz
< minsz
)
526 /* Can we do a slot or bus reset or neither? */
527 if (!pci_probe_reset_slot(vdev
->pdev
->slot
))
529 else if (pci_probe_reset_bus(vdev
->pdev
->bus
))
532 /* How many devices are affected? */
533 ret
= vfio_pci_for_each_slot_or_bus(vdev
->pdev
,
539 WARN_ON(!fill
.max
); /* Should always be at least one */
542 * If there's enough space, fill it now, otherwise return
543 * -ENOSPC and the number of devices affected.
545 if (hdr
.argsz
< sizeof(hdr
) + (fill
.max
* sizeof(*devices
))) {
547 hdr
.count
= fill
.max
;
548 goto reset_info_exit
;
551 devices
= kcalloc(fill
.max
, sizeof(*devices
), GFP_KERNEL
);
555 fill
.devices
= devices
;
557 ret
= vfio_pci_for_each_slot_or_bus(vdev
->pdev
,
562 * If a device was removed between counting and filling,
563 * we may come up short of fill.max. If a device was
564 * added, we'll have a return of -EAGAIN above.
567 hdr
.count
= fill
.cur
;
570 if (copy_to_user((void __user
*)arg
, &hdr
, minsz
))
574 if (copy_to_user((void __user
*)(arg
+ minsz
), devices
,
575 hdr
.count
* sizeof(*devices
)))
582 } else if (cmd
== VFIO_DEVICE_PCI_HOT_RESET
) {
583 struct vfio_pci_hot_reset hdr
;
585 struct vfio_pci_group_entry
*groups
;
586 struct vfio_pci_group_info info
;
588 int i
, count
= 0, ret
= 0;
590 minsz
= offsetofend(struct vfio_pci_hot_reset
, count
);
592 if (copy_from_user(&hdr
, (void __user
*)arg
, minsz
))
595 if (hdr
.argsz
< minsz
|| hdr
.flags
)
598 /* Can we do a slot or bus reset or neither? */
599 if (!pci_probe_reset_slot(vdev
->pdev
->slot
))
601 else if (pci_probe_reset_bus(vdev
->pdev
->bus
))
605 * We can't let userspace give us an arbitrarily large
606 * buffer to copy, so verify how many we think there
607 * could be. Note groups can have multiple devices so
608 * one group per device is the max.
610 ret
= vfio_pci_for_each_slot_or_bus(vdev
->pdev
,
616 /* Somewhere between 1 and count is OK */
617 if (!hdr
.count
|| hdr
.count
> count
)
620 group_fds
= kcalloc(hdr
.count
, sizeof(*group_fds
), GFP_KERNEL
);
621 groups
= kcalloc(hdr
.count
, sizeof(*groups
), GFP_KERNEL
);
622 if (!group_fds
|| !groups
) {
628 if (copy_from_user(group_fds
, (void __user
*)(arg
+ minsz
),
629 hdr
.count
* sizeof(*group_fds
))) {
636 * For each group_fd, get the group through the vfio external
637 * user interface and store the group and iommu ID. This
638 * ensures the group is held across the reset.
640 for (i
= 0; i
< hdr
.count
; i
++) {
641 struct vfio_group
*group
;
642 struct fd f
= fdget(group_fds
[i
]);
648 group
= vfio_group_get_external_user(f
.file
);
651 ret
= PTR_ERR(group
);
655 groups
[i
].group
= group
;
656 groups
[i
].id
= vfio_external_user_iommu_id(group
);
661 /* release reference to groups on error */
663 goto hot_reset_release
;
665 info
.count
= hdr
.count
;
666 info
.groups
= groups
;
669 * Test whether all the affected devices are contained
670 * by the set of groups provided by the user.
672 ret
= vfio_pci_for_each_slot_or_bus(vdev
->pdev
,
673 vfio_pci_validate_devs
,
676 /* User has access, do the reset */
677 ret
= slot
? pci_try_reset_slot(vdev
->pdev
->slot
) :
678 pci_try_reset_bus(vdev
->pdev
->bus
);
681 for (i
--; i
>= 0; i
--)
682 vfio_group_put_external_user(groups
[i
].group
);
691 static ssize_t
vfio_pci_rw(void *device_data
, char __user
*buf
,
692 size_t count
, loff_t
*ppos
, bool iswrite
)
694 unsigned int index
= VFIO_PCI_OFFSET_TO_INDEX(*ppos
);
695 struct vfio_pci_device
*vdev
= device_data
;
697 if (index
>= VFIO_PCI_NUM_REGIONS
)
701 case VFIO_PCI_CONFIG_REGION_INDEX
:
702 return vfio_pci_config_rw(vdev
, buf
, count
, ppos
, iswrite
);
704 case VFIO_PCI_ROM_REGION_INDEX
:
707 return vfio_pci_bar_rw(vdev
, buf
, count
, ppos
, false);
709 case VFIO_PCI_BAR0_REGION_INDEX
... VFIO_PCI_BAR5_REGION_INDEX
:
710 return vfio_pci_bar_rw(vdev
, buf
, count
, ppos
, iswrite
);
712 case VFIO_PCI_VGA_REGION_INDEX
:
713 return vfio_pci_vga_rw(vdev
, buf
, count
, ppos
, iswrite
);
719 static ssize_t
vfio_pci_read(void *device_data
, char __user
*buf
,
720 size_t count
, loff_t
*ppos
)
725 return vfio_pci_rw(device_data
, buf
, count
, ppos
, false);
728 static ssize_t
vfio_pci_write(void *device_data
, const char __user
*buf
,
729 size_t count
, loff_t
*ppos
)
734 return vfio_pci_rw(device_data
, (char __user
*)buf
, count
, ppos
, true);
737 static int vfio_pci_mmap(void *device_data
, struct vm_area_struct
*vma
)
739 struct vfio_pci_device
*vdev
= device_data
;
740 struct pci_dev
*pdev
= vdev
->pdev
;
742 u64 phys_len
, req_len
, pgoff
, req_start
;
745 index
= vma
->vm_pgoff
>> (VFIO_PCI_OFFSET_SHIFT
- PAGE_SHIFT
);
747 if (vma
->vm_end
< vma
->vm_start
)
749 if ((vma
->vm_flags
& VM_SHARED
) == 0)
751 if (index
>= VFIO_PCI_ROM_REGION_INDEX
)
753 if (!(pci_resource_flags(pdev
, index
) & IORESOURCE_MEM
))
756 phys_len
= pci_resource_len(pdev
, index
);
757 req_len
= vma
->vm_end
- vma
->vm_start
;
758 pgoff
= vma
->vm_pgoff
&
759 ((1U << (VFIO_PCI_OFFSET_SHIFT
- PAGE_SHIFT
)) - 1);
760 req_start
= pgoff
<< PAGE_SHIFT
;
762 if (phys_len
< PAGE_SIZE
|| req_start
+ req_len
> phys_len
)
765 if (index
== vdev
->msix_bar
) {
767 * Disallow mmaps overlapping the MSI-X table; users don't
768 * get to touch this directly. We could find somewhere
769 * else to map the overlap, but page granularity is only
770 * a recommendation, not a requirement, so the user needs
771 * to know which bits are real. Requiring them to mmap
772 * around the table makes that clear.
775 /* If neither entirely above nor below, then it overlaps */
776 if (!(req_start
>= vdev
->msix_offset
+ vdev
->msix_size
||
777 req_start
+ req_len
<= vdev
->msix_offset
))
782 * Even though we don't make use of the barmap for the mmap,
783 * we need to request the region and the barmap tracks that.
785 if (!vdev
->barmap
[index
]) {
786 ret
= pci_request_selected_regions(pdev
,
787 1 << index
, "vfio-pci");
791 vdev
->barmap
[index
] = pci_iomap(pdev
, index
, 0);
794 vma
->vm_private_data
= vdev
;
795 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
796 vma
->vm_pgoff
= (pci_resource_start(pdev
, index
) >> PAGE_SHIFT
) + pgoff
;
798 return remap_pfn_range(vma
, vma
->vm_start
, vma
->vm_pgoff
,
799 req_len
, vma
->vm_page_prot
);
802 static const struct vfio_device_ops vfio_pci_ops
= {
804 .open
= vfio_pci_open
,
805 .release
= vfio_pci_release
,
806 .ioctl
= vfio_pci_ioctl
,
807 .read
= vfio_pci_read
,
808 .write
= vfio_pci_write
,
809 .mmap
= vfio_pci_mmap
,
812 static int vfio_pci_probe(struct pci_dev
*pdev
, const struct pci_device_id
*id
)
815 struct vfio_pci_device
*vdev
;
816 struct iommu_group
*group
;
819 pci_read_config_byte(pdev
, PCI_HEADER_TYPE
, &type
);
820 if ((type
& PCI_HEADER_TYPE
) != PCI_HEADER_TYPE_NORMAL
)
823 group
= iommu_group_get(&pdev
->dev
);
827 vdev
= kzalloc(sizeof(*vdev
), GFP_KERNEL
);
829 iommu_group_put(group
);
834 vdev
->irq_type
= VFIO_PCI_NUM_IRQS
;
835 mutex_init(&vdev
->igate
);
836 spin_lock_init(&vdev
->irqlock
);
837 atomic_set(&vdev
->refcnt
, 0);
839 ret
= vfio_add_group_dev(&pdev
->dev
, &vfio_pci_ops
, vdev
);
841 iommu_group_put(group
);
848 static void vfio_pci_remove(struct pci_dev
*pdev
)
850 struct vfio_pci_device
*vdev
;
852 vdev
= vfio_del_group_dev(&pdev
->dev
);
856 iommu_group_put(pdev
->dev
.iommu_group
);
860 static pci_ers_result_t
vfio_pci_aer_err_detected(struct pci_dev
*pdev
,
861 pci_channel_state_t state
)
863 struct vfio_pci_device
*vdev
;
864 struct vfio_device
*device
;
866 device
= vfio_device_get_from_dev(&pdev
->dev
);
868 return PCI_ERS_RESULT_DISCONNECT
;
870 vdev
= vfio_device_data(device
);
872 vfio_device_put(device
);
873 return PCI_ERS_RESULT_DISCONNECT
;
876 mutex_lock(&vdev
->igate
);
878 if (vdev
->err_trigger
)
879 eventfd_signal(vdev
->err_trigger
, 1);
881 mutex_unlock(&vdev
->igate
);
883 vfio_device_put(device
);
885 return PCI_ERS_RESULT_CAN_RECOVER
;
888 static struct pci_error_handlers vfio_err_handlers
= {
889 .error_detected
= vfio_pci_aer_err_detected
,
892 static struct pci_driver vfio_pci_driver
= {
894 .id_table
= NULL
, /* only dynamic ids */
895 .probe
= vfio_pci_probe
,
896 .remove
= vfio_pci_remove
,
897 .err_handler
= &vfio_err_handlers
,
900 static void __exit
vfio_pci_cleanup(void)
902 pci_unregister_driver(&vfio_pci_driver
);
903 vfio_pci_virqfd_exit();
904 vfio_pci_uninit_perm_bits();
907 static int __init
vfio_pci_init(void)
911 /* Allocate shared config space permision data used by all devices */
912 ret
= vfio_pci_init_perm_bits();
916 /* Start the virqfd cleanup handler */
917 ret
= vfio_pci_virqfd_init();
921 /* Register and scan for devices */
922 ret
= pci_register_driver(&vfio_pci_driver
);
929 vfio_pci_virqfd_exit();
931 vfio_pci_uninit_perm_bits();
935 module_init(vfio_pci_init
);
936 module_exit(vfio_pci_cleanup
);
938 MODULE_VERSION(DRIVER_VERSION
);
939 MODULE_LICENSE("GPL v2");
940 MODULE_AUTHOR(DRIVER_AUTHOR
);
941 MODULE_DESCRIPTION(DRIVER_DESC
);