xen/pciback: Allocate IRQ handler for device that is shared with guest.
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tue, 19 Jul 2011 22:56:39 +0000 (18:56 -0400)
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Wed, 20 Jul 2011 00:58:31 +0000 (20:58 -0400)
If the device that is to be shared with a guest is a level device and
the IRQ is shared with the initial domain we need to take actions.
Mainly we install a dummy IRQ handler that will ACK on the interrupt
line so as to not have the initial domain disable the interrupt line.

This dummy IRQ handler is not enabled when the device MSI/MSI-X lines
are set, nor for edge interrupts. And also not for level interrupts
that are not shared amongst devices. Lastly, if the user passes
to the guest all of the PCI devices on the shared line the we won't
install the dummy handler either.

There is also SysFS instrumentation to check its state and turn
IRQ ACKing on/off if necessary.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
drivers/xen/xen-pciback/conf_space_capability_msi.c
drivers/xen/xen-pciback/conf_space_header.c
drivers/xen/xen-pciback/pci_stub.c
drivers/xen/xen-pciback/pciback.h
drivers/xen/xen-pciback/pciback_ops.c

index 78f74b1852d4bd85a65d599c7ef154ec347cbff7..d0d2255b5da9d2674325f0ad3bbb943159e17297 100644 (file)
@@ -12,6 +12,7 @@
 int pciback_enable_msi(struct pciback_device *pdev,
                struct pci_dev *dev, struct xen_pci_op *op)
 {
+       struct pciback_dev_data *dev_data;
        int otherend = pdev->xdev->otherend_id;
        int status;
 
@@ -28,21 +29,29 @@ int pciback_enable_msi(struct pciback_device *pdev,
         * the local domain's IRQ number. */
 
        op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
+       dev_data = pci_get_drvdata(dev);
+       if (dev_data)
+               dev_data->ack_intr = 0;
        return 0;
 }
 
 int pciback_disable_msi(struct pciback_device *pdev,
                struct pci_dev *dev, struct xen_pci_op *op)
 {
+       struct pciback_dev_data *dev_data;
        pci_disable_msi(dev);
 
        op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
+       dev_data = pci_get_drvdata(dev);
+       if (dev_data)
+               dev_data->ack_intr = 1;
        return 0;
 }
 
 int pciback_enable_msix(struct pciback_device *pdev,
                struct pci_dev *dev, struct xen_pci_op *op)
 {
+       struct pciback_dev_data *dev_data;
        int i, result;
        struct msix_entry *entries;
 
@@ -74,6 +83,9 @@ int pciback_enable_msix(struct pciback_device *pdev,
        kfree(entries);
 
        op->value = result;
+       dev_data = pci_get_drvdata(dev);
+       if (dev_data)
+               dev_data->ack_intr = 0;
 
        return result;
 }
@@ -81,6 +93,7 @@ int pciback_enable_msix(struct pciback_device *pdev,
 int pciback_disable_msix(struct pciback_device *pdev,
                struct pci_dev *dev, struct xen_pci_op *op)
 {
+       struct pciback_dev_data *dev_data;
 
        pci_disable_msix(dev);
 
@@ -89,6 +102,10 @@ int pciback_disable_msix(struct pciback_device *pdev,
         * an undefined IRQ value of zero.
         */
        op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
+       dev_data = pci_get_drvdata(dev);
+       if (dev_data)
+               dev_data->ack_intr = 1;
+
        return 0;
 }
 
index dcd6dd964e3bfdd93e6f0a9cf21deaff59d6a5b4..22ad0f56066988ed8077f6fcda698a1d10f258c6 100644 (file)
@@ -39,8 +39,10 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
 
 static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 {
+       struct pciback_dev_data *dev_data;
        int err;
 
+       dev_data = pci_get_drvdata(dev);
        if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
                if (unlikely(verbose_request))
                        printk(KERN_DEBUG "pciback: %s: enable\n",
@@ -48,11 +50,15 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
                err = pci_enable_device(dev);
                if (err)
                        return err;
+               if (dev_data)
+                       dev_data->enable_intx = 1;
        } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
                if (unlikely(verbose_request))
                        printk(KERN_DEBUG "pciback: %s: disable\n",
                               pci_name(dev));
                pci_disable_device(dev);
+               if (dev_data)
+                       dev_data->enable_intx = 0;
        }
 
        if (!dev->is_busmaster && is_master_cmd(value)) {
index ac8396d8206b4f72d49706e05d45014011e4392e..c4d1071ebbe64222175c32727825885ec253e6ee 100644 (file)
@@ -21,6 +21,8 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"
 
+#define DRV_NAME       "pciback"
+
 static char *pci_devs_to_hide;
 wait_queue_head_t aer_wait_queue;
 /*Add sem for sync AER handling and pciback remove/reconfigue ops,
@@ -290,13 +292,20 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
         * would need to be called somewhere to free the memory allocated
         * here and then to call kfree(pci_get_drvdata(psdev->dev)).
         */
-       dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
+       dev_data = kzalloc(sizeof(*dev_data) +  strlen(DRV_NAME "[]")
+                               + strlen(pci_name(dev)) + 1, GFP_ATOMIC);
        if (!dev_data) {
                err = -ENOMEM;
                goto out;
        }
        pci_set_drvdata(dev, dev_data);
 
+       /*
+        * Setup name for fake IRQ handler. It will only be enabled
+        * once the device is turned on by the guest.
+        */
+       sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
+
        dev_dbg(&dev->dev, "initializing config\n");
 
        init_waitqueue_head(&aer_wait_queue);
@@ -837,7 +846,7 @@ static struct pci_error_handlers pciback_error_handler = {
  */
 
 static struct pci_driver pciback_pci_driver = {
-       .name = "pciback",
+       .name = DRV_NAME,
        .id_table = pcistub_ids,
        .probe = pcistub_probe,
        .remove = pcistub_remove,
@@ -1029,6 +1038,72 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
 
 DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
 
+static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
+{
+       struct pcistub_device *psdev;
+       struct pciback_dev_data *dev_data;
+       size_t count = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pcistub_devices_lock, flags);
+       list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+               if (count >= PAGE_SIZE)
+                       break;
+               if (!psdev->dev)
+                       continue;
+               dev_data = pci_get_drvdata(psdev->dev);
+               if (!dev_data)
+                       continue;
+               count +=
+                   scnprintf(buf + count, PAGE_SIZE - count,
+                             "%s:%s:%sing:%ld\n",
+                             pci_name(psdev->dev),
+                             dev_data->isr_on ? "on" : "off",
+                             dev_data->ack_intr ? "ack" : "not ack",
+                             dev_data->handled);
+       }
+       spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+       return count;
+}
+
+DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
+
+static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
+                                         const char *buf,
+                                         size_t count)
+{
+       struct pcistub_device *psdev;
+       struct pciback_dev_data *dev_data;
+       int domain, bus, slot, func;
+       int err = -ENOENT;
+
+       err = str_to_slot(buf, &domain, &bus, &slot, &func);
+       if (err)
+               goto out;
+
+       psdev = pcistub_device_find(domain, bus, slot, func);
+
+       if (!psdev)
+               goto out;
+
+       dev_data = pci_get_drvdata(psdev->dev);
+       if (!dev_data)
+               goto out;
+
+       dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
+               dev_data->irq_name, dev_data->isr_on,
+               !dev_data->isr_on);
+
+       dev_data->isr_on = !(dev_data->isr_on);
+       if (dev_data->isr_on)
+               dev_data->ack_intr = 1;
+out:
+       if (!err)
+               err = count;
+       return err;
+}
+DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
+
 static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
                                 size_t count)
 {
@@ -1168,7 +1243,10 @@ static void pcistub_exit(void)
        driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
        driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
        driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
-
+       driver_remove_file(&pciback_pci_driver.driver,
+                          &driver_attr_irq_handlers);
+       driver_remove_file(&pciback_pci_driver.driver,
+                          &driver_attr_irq_handler_state);
        pci_unregister_driver(&pciback_pci_driver);
 }
 
@@ -1227,6 +1305,12 @@ static int __init pcistub_init(void)
                err = driver_create_file(&pciback_pci_driver.driver,
                                         &driver_attr_permissive);
 
+       if (!err)
+               err = driver_create_file(&pciback_pci_driver.driver,
+                                        &driver_attr_irq_handlers);
+       if (!err)
+               err = driver_create_file(&pciback_pci_driver.driver,
+                                       &driver_attr_irq_handler_state);
        if (err)
                pcistub_exit();
 
index c1e95e88ee9e8a266829ef3be71f51e7448b1094..5c140200a5ea8dcab6259d1a656b5adb43605661 100644 (file)
@@ -45,8 +45,14 @@ struct pciback_device {
 
 struct pciback_dev_data {
        struct list_head config_fields;
-       int permissive;
-       int warned_on_write;
+       unsigned int permissive:1;
+       unsigned int warned_on_write:1;
+       unsigned int enable_intx:1;
+       unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
+       unsigned int ack_intr:1; /* .. and ACK-ing */
+       unsigned long handled;
+       unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
+       char irq_name[0]; /* pciback[000:04:00.0] */
 };
 
 /* Used by XenBus and pciback_ops.c */
@@ -131,3 +137,6 @@ extern int verbose_request;
 void test_and_schedule_op(struct pciback_device *pdev);
 #endif
 
+/* Handles shared IRQs that can to device domain and control domain. */
+void pciback_irq_handler(struct pci_dev *dev, int reset);
+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id);
index 011db675e437927fe3f85fc0f12d9adfb2d22c78..6c398fde7a831ba11e098628b1430a6975026a72 100644 (file)
 int verbose_request;
 module_param(verbose_request, int, 0644);
 
+/* Ensure a device is has the fake IRQ handler "turned on/off" and is
+ * ready to be exported. This MUST be run after pciback_reset_device
+ * which does the actual PCI device enable/disable.
+ */
+void pciback_control_isr(struct pci_dev *dev, int reset)
+{
+       struct pciback_dev_data *dev_data;
+       int rc;
+       int enable = 0;
+
+       dev_data = pci_get_drvdata(dev);
+       if (!dev_data)
+               return;
+
+       /* We don't deal with bridges */
+       if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+               return;
+
+       if (reset) {
+               dev_data->enable_intx = 0;
+               dev_data->ack_intr = 0;
+       }
+       enable =  dev_data->enable_intx;
+
+       /* Asked to disable, but ISR isn't runnig */
+       if (!enable && !dev_data->isr_on)
+               return;
+
+       /* Squirrel away the IRQs in the dev_data. We need this
+        * b/c when device transitions to MSI, the dev->irq is
+        * overwritten with the MSI vector.
+        */
+       if (enable)
+               dev_data->irq = dev->irq;
+
+       dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
+               dev_data->irq_name,
+               dev_data->irq,
+               pci_is_enabled(dev) ? "on" : "off",
+               dev->msi_enabled ? "MSI" : "",
+               dev->msix_enabled ? "MSI/X" : "",
+               dev_data->isr_on ? "enable" : "disable",
+               enable ? "enable" : "disable");
+
+       if (enable) {
+               rc = request_irq(dev_data->irq,
+                               pciback_guest_interrupt, IRQF_SHARED,
+                               dev_data->irq_name, dev);
+               if (rc) {
+                       dev_err(&dev->dev, "%s: failed to install fake IRQ " \
+                               "handler for IRQ %d! (rc:%d)\n",
+                               dev_data->irq_name, dev_data->irq, rc);
+                       goto out;
+               }
+       } else {
+               free_irq(dev_data->irq, dev);
+               dev_data->irq = 0;
+       }
+       dev_data->isr_on = enable;
+       dev_data->ack_intr = enable;
+out:
+       dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
+               dev_data->irq_name,
+               dev_data->irq,
+               pci_is_enabled(dev) ? "on" : "off",
+               dev->msi_enabled ? "MSI" : "",
+               dev->msix_enabled ? "MSI/X" : "",
+               enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
+                       (dev_data->isr_on ? "failed to disable" : "disabled"));
+}
+
 /* Ensure a device is "turned off" and ready to be exported.
  * (Also see pciback_config_reset to ensure virtual configuration space is
  * ready to be re-exported)
@@ -21,6 +92,8 @@ void pciback_reset_device(struct pci_dev *dev)
 {
        u16 cmd;
 
+       pciback_control_isr(dev, 1 /* reset device */);
+
        /* Disable devices (but not bridges) */
        if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
 #ifdef CONFIG_PCI_MSI
@@ -78,13 +151,18 @@ void pciback_do_op(struct work_struct *data)
        struct pciback_device *pdev =
                container_of(data, struct pciback_device, op_work);
        struct pci_dev *dev;
+       struct pciback_dev_data *dev_data = NULL;
        struct xen_pci_op *op = &pdev->sh_info->op;
+       int test_intx = 0;
 
        dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
 
        if (dev == NULL)
                op->err = XEN_PCI_ERR_dev_not_found;
        else {
+               dev_data = pci_get_drvdata(dev);
+               if (dev_data)
+                       test_intx = dev_data->enable_intx;
                switch (op->cmd) {
                case XEN_PCI_OP_conf_read:
                        op->err = pciback_config_read(dev,
@@ -113,6 +191,11 @@ void pciback_do_op(struct work_struct *data)
                        break;
                }
        }
+       if (!op->err && dev && dev_data) {
+               /* Transition detected */
+               if ((dev_data->enable_intx != test_intx))
+                       pciback_control_isr(dev, 0 /* no reset */);
+       }
        /* Tell the driver domain that we're done. */
        wmb();
        clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
@@ -137,3 +220,22 @@ irqreturn_t pciback_handle_event(int irq, void *dev_id)
 
        return IRQ_HANDLED;
 }
+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
+{
+       struct pci_dev *dev = (struct pci_dev *)dev_id;
+       struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+
+       if (dev_data->isr_on && dev_data->ack_intr) {
+               dev_data->handled++;
+               if ((dev_data->handled % 1000) == 0) {
+                       if (xen_test_irq_shared(irq)) {
+                               printk(KERN_INFO "%s IRQ line is not shared "
+                                       "with other domains. Turning ISR off\n",
+                                        dev_data->irq_name);
+                               dev_data->ack_intr = 0;
+                       }
+               }
+               return IRQ_HANDLED;
+       }
+       return IRQ_NONE;
+}