powerpc/xive: guest exploitation of the XIVE interrupt controller
authorCédric Le Goater <clg@kaod.org>
Wed, 30 Aug 2017 19:46:11 +0000 (21:46 +0200)
committerMichael Ellerman <mpe@ellerman.id.au>
Sat, 2 Sep 2017 11:02:35 +0000 (21:02 +1000)
This is the framework for using XIVE in a PowerVM guest. The support
is very similar to the native one in a much simpler form.

Each source is associated with an Event State Buffer (ESB). This is a
two bit state machine which is used to trigger events. The bits are
named "P" (pending) and "Q" (queued) and can be controlled by MMIO.
The Guest OS registers event (or notifications) queues on which the HW
will post event data for a target to notify.

Instead of OPAL calls, a set of Hypervisors call are used to configure
the interrupt sources and the event/notification queues of the guest:

 - H_INT_GET_SOURCE_INFO

   used to obtain the address of the MMIO page of the Event State
   Buffer (PQ bits) entry associated with the source.

 - H_INT_SET_SOURCE_CONFIG

   assigns a source to a "target".

 - H_INT_GET_SOURCE_CONFIG

   determines to which "target" and "priority" is assigned to a source

 - H_INT_GET_QUEUE_INFO

   returns the address of the notification management page associated
   with the specified "target" and "priority".

 - H_INT_SET_QUEUE_CONFIG

   sets or resets the event queue for a given "target" and "priority".
   It is also used to set the notification config associated with the
   queue, only unconditional notification for the moment.  Reset is
   performed with a queue size of 0 and queueing is disabled in that
   case.

 - H_INT_GET_QUEUE_CONFIG

   returns the queue settings for a given "target" and "priority".

 - H_INT_RESET

   resets all of the partition's interrupt exploitation structures to
   their initial state, losing all configuration set via the hcalls
   H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.

 - H_INT_SYNC

   issue a synchronisation on a source to make sure sure all
   notifications have reached their queue.

As for XICS, the XIVE interface for the guest is described in the
device tree under the "interrupt-controller" node. A couple of new
properties are specific to XIVE :

 - "reg"

   contains the base address and size of the thread interrupt
   managnement areas (TIMA), also called rings, for the User level and
   for the Guest OS level. Only the Guest OS level is taken into
   account today.

 - "ibm,xive-eq-sizes"

   the size of the event queues. One cell per size supported, contains
   log2 of size, in ascending order.

 - "ibm,xive-lisn-ranges"

   the interrupt numbers ranges assigned to the guest. These are
   allocated using a simple bitmap.

and also :

 - "/ibm,plat-res-int-priorities"

   contains a list of priorities that the hypervisor has reserved for
   its own use.

Tested with a QEMU XIVE model for pseries and with the Power hypervisor.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/xive.h
arch/powerpc/platforms/pseries/Kconfig
arch/powerpc/platforms/pseries/hotplug-cpu.c
arch/powerpc/platforms/pseries/kexec.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/platforms/pseries/smp.c
arch/powerpc/sysdev/xive/Kconfig
arch/powerpc/sysdev/xive/Makefile
arch/powerpc/sysdev/xive/common.c
arch/powerpc/sysdev/xive/spapr.c [new file with mode: 0644]

index 57d38b504ff773facfeaee601cd5bab8e2d9796c..3d34dc0869f687a731ed97ad42eb4c47c579f11b 100644 (file)
 #define H_RESIZE_HPT_COMMIT    0x370
 #define H_REGISTER_PROC_TBL    0x37C
 #define H_SIGNAL_SYS_RESET     0x380
-#define MAX_HCALL_OPCODE       H_SIGNAL_SYS_RESET
+#define H_INT_GET_SOURCE_INFO   0x3A8
+#define H_INT_SET_SOURCE_CONFIG 0x3AC
+#define H_INT_GET_SOURCE_CONFIG 0x3B0
+#define H_INT_GET_QUEUE_INFO    0x3B4
+#define H_INT_SET_QUEUE_CONFIG  0x3B8
+#define H_INT_GET_QUEUE_CONFIG  0x3BC
+#define H_INT_SET_OS_REPORTING_LINE 0x3C0
+#define H_INT_GET_OS_REPORTING_LINE 0x3C4
+#define H_INT_ESB               0x3C8
+#define H_INT_SYNC              0x3CC
+#define H_INT_RESET             0x3D0
+#define MAX_HCALL_OPCODE       H_INT_RESET
 
 /* H_VIOCTL functions */
 #define H_GET_VIOA_DUMP_SIZE   0x01
index c23ff4389ca236c43994fe4279b7f151e80fe5e7..473f133a8555fe8e918e78f7424bdea2b6d21deb 100644 (file)
@@ -110,11 +110,13 @@ extern bool __xive_enabled;
 
 static inline bool xive_enabled(void) { return __xive_enabled; }
 
+extern bool xive_spapr_init(void);
 extern bool xive_native_init(void);
 extern void xive_smp_probe(void);
 extern int  xive_smp_prepare_cpu(unsigned int cpu);
 extern void xive_smp_setup_cpu(void);
 extern void xive_smp_disable_cpu(void);
+extern void xive_teardown_cpu(void);
 extern void xive_kexec_teardown_cpu(int secondary);
 extern void xive_shutdown(void);
 extern void xive_flush_interrupt(void);
@@ -147,6 +149,7 @@ extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
 
 static inline bool xive_enabled(void) { return false; }
 
+static inline bool xive_spapr_init(void) { return false; }
 static inline bool xive_native_init(void) { return false; }
 static inline void xive_smp_probe(void) { }
 extern inline int  xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
index 3a6dfd14f64ba236a1a7eaea7f0aae6e607eab67..71dd69d9ec64578e32292f772d43b6056347c896 100644 (file)
@@ -7,6 +7,7 @@ config PPC_PSERIES
        select PCI
        select PCI_MSI
        select PPC_XICS
+       select PPC_XIVE_SPAPR
        select PPC_ICP_NATIVE
        select PPC_ICP_HV
        select PPC_ICS_RTAS
index b357f1ae0b0a0e4a0fd295a75f00a0e3e8be2160..fc0d8f97c03a2f243a51c9ba27c7043626bd95cc 100644 (file)
@@ -34,6 +34,7 @@
 #include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
 #include <asm/xics.h>
+#include <asm/xive.h>
 #include <asm/plpar_wrappers.h>
 
 #include "pseries.h"
@@ -109,7 +110,10 @@ static void pseries_mach_cpu_die(void)
 
        local_irq_disable();
        idle_task_exit();
-       xics_teardown_cpu();
+       if (xive_enabled())
+               xive_teardown_cpu();
+       else
+               xics_teardown_cpu();
 
        if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
                set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
@@ -174,7 +178,10 @@ static int pseries_cpu_disable(void)
                boot_cpuid = cpumask_any(cpu_online_mask);
 
        /* FIXME: abstract this to not be platform specific later on */
-       xics_migrate_irqs_away();
+       if (xive_enabled())
+               xive_smp_disable_cpu();
+       else
+               xics_migrate_irqs_away();
        return 0;
 }
 
index 6681ac97fb1830d30638676698287d0d3d42fdae..eeb13429d68535e50e2d80f05ce8d89ba01fbc4c 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/firmware.h>
 #include <asm/kexec.h>
 #include <asm/xics.h>
+#include <asm/xive.h>
 #include <asm/smp.h>
 #include <asm/plpar_wrappers.h>
 
@@ -51,5 +52,8 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
                }
        }
 
-       xics_kexec_teardown_cpu(secondary);
+       if (xive_enabled())
+               xive_kexec_teardown_cpu(secondary);
+       else
+               xics_kexec_teardown_cpu(secondary);
 }
index b5b650910cf5159746d8af8d01e931af7bd3f7d0..5f1beb8367acaa4562dbc647ad06a1f4c12a8f72 100644 (file)
@@ -57,6 +57,7 @@
 #include <asm/nvram.h>
 #include <asm/pmc.h>
 #include <asm/xics.h>
+#include <asm/xive.h>
 #include <asm/ppc-pci.h>
 #include <asm/i8259.h>
 #include <asm/udbg.h>
@@ -176,8 +177,11 @@ static void __init pseries_setup_i8259_cascade(void)
 
 static void __init pseries_init_irq(void)
 {
-       xics_init();
-       pseries_setup_i8259_cascade();
+       /* Try using a XIVE if available, otherwise use a XICS */
+       if (!xive_spapr_init()) {
+               xics_init();
+               pseries_setup_i8259_cascade();
+       }
 }
 
 static void pseries_lpar_enable_pmcs(void)
index 24785f63fb4033ab0a4f6a020934019f3f04b6eb..2e184829e5d496d7ce1ecf0808c3a6185b04395b 100644 (file)
@@ -41,6 +41,7 @@
 #include <asm/vdso_datapage.h>
 #include <asm/cputhreads.h>
 #include <asm/xics.h>
+#include <asm/xive.h>
 #include <asm/dbell.h>
 #include <asm/plpar_wrappers.h>
 #include <asm/code-patching.h>
@@ -136,7 +137,9 @@ out:
 
 static void smp_setup_cpu(int cpu)
 {
-       if (cpu != boot_cpuid)
+       if (xive_enabled())
+               xive_smp_setup_cpu();
+       else if (cpu != boot_cpuid)
                xics_setup_cpu();
 
        if (firmware_has_feature(FW_FEATURE_SPLPAR))
@@ -181,6 +184,13 @@ static int smp_pSeries_kick_cpu(int nr)
        return 0;
 }
 
+static int pseries_smp_prepare_cpu(int cpu)
+{
+       if (xive_enabled())
+               return xive_smp_prepare_cpu(cpu);
+       return 0;
+}
+
 static void smp_pseries_cause_ipi(int cpu)
 {
        /* POWER9 should not use this handler */
@@ -211,7 +221,7 @@ static int pseries_cause_nmi_ipi(int cpu)
        return 0;
 }
 
-static __init void pSeries_smp_probe(void)
+static __init void pSeries_smp_probe_xics(void)
 {
        xics_smp_probe();
 
@@ -221,11 +231,24 @@ static __init void pSeries_smp_probe(void)
                smp_ops->cause_ipi = icp_ops->cause_ipi;
 }
 
+static __init void pSeries_smp_probe(void)
+{
+       if (xive_enabled())
+               /*
+                * Don't use P9 doorbells when XIVE is enabled. IPIs
+                * using MMIOs should be faster
+                */
+               xive_smp_probe();
+       else
+               pSeries_smp_probe_xics();
+}
+
 static struct smp_ops_t pseries_smp_ops = {
        .message_pass   = NULL, /* Use smp_muxed_ipi_message_pass */
        .cause_ipi      = NULL, /* Filled at runtime by pSeries_smp_probe() */
        .cause_nmi_ipi  = pseries_cause_nmi_ipi,
        .probe          = pSeries_smp_probe,
+       .prepare_cpu    = pseries_smp_prepare_cpu,
        .kick_cpu       = smp_pSeries_kick_cpu,
        .setup_cpu      = smp_setup_cpu,
        .cpu_bootable   = smp_generic_cpu_bootable,
index 12ccd7373d2f017ea5fca33261a781a5eed42108..3e3e25b5e30d66ace3e9a77c39311c82faf9f3fe 100644 (file)
@@ -9,3 +9,8 @@ config PPC_XIVE_NATIVE
        default n
        select PPC_XIVE
        depends on PPC_POWERNV
+
+config PPC_XIVE_SPAPR
+       bool
+       default n
+       select PPC_XIVE
index 3fab303fc16966302ebc74a4c9ba47b067cb005d..536d6e5706e32c253b0a742065e6c964e914cf3b 100644 (file)
@@ -2,3 +2,4 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 obj-y                          += common.o
 obj-$(CONFIG_PPC_XIVE_NATIVE)  += native.o
+obj-$(CONFIG_PPC_XIVE_SPAPR)   += spapr.o
index a30d6d12b92bdc26e23f6f8bc9f4a76657645760..f1e0a3326d01d7c09e5fd5d93755730db27acf49 100644 (file)
@@ -1372,6 +1372,19 @@ void xive_flush_interrupt(void)
 
 #endif /* CONFIG_SMP */
 
+void xive_teardown_cpu(void)
+{
+       struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+       unsigned int cpu = smp_processor_id();
+
+       /* Set CPPR to 0 to disable flow of interrupts */
+       xc->cppr = 0;
+       out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
+
+       if (xive_ops->teardown_cpu)
+               xive_ops->teardown_cpu(cpu, xc);
+}
+
 void xive_kexec_teardown_cpu(int secondary)
 {
        struct xive_cpu *xc = __this_cpu_read(xive_cpu);
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
new file mode 100644 (file)
index 0000000..797bb06
--- /dev/null
@@ -0,0 +1,618 @@
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "xive: " fmt
+
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/cpumask.h>
+#include <linux/mm.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/hvcall.h>
+
+#include "xive-internal.h"
+
+static u32 xive_queue_shift;
+
+struct xive_irq_bitmap {
+       unsigned long           *bitmap;
+       unsigned int            base;
+       unsigned int            count;
+       spinlock_t              lock;
+       struct list_head        list;
+};
+
+static LIST_HEAD(xive_irq_bitmaps);
+
+static int xive_irq_bitmap_add(int base, int count)
+{
+       struct xive_irq_bitmap *xibm;
+
+       xibm = kzalloc(sizeof(*xibm), GFP_ATOMIC);
+       if (!xibm)
+               return -ENOMEM;
+
+       spin_lock_init(&xibm->lock);
+       xibm->base = base;
+       xibm->count = count;
+       xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL);
+       list_add(&xibm->list, &xive_irq_bitmaps);
+
+       pr_info("Using IRQ range [%x-%x]", xibm->base,
+               xibm->base + xibm->count - 1);
+       return 0;
+}
+
+static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm)
+{
+       int irq;
+
+       irq = find_first_zero_bit(xibm->bitmap, xibm->count);
+       if (irq != xibm->count) {
+               set_bit(irq, xibm->bitmap);
+               irq += xibm->base;
+       } else {
+               irq = -ENOMEM;
+       }
+
+       return irq;
+}
+
+static int xive_irq_bitmap_alloc(void)
+{
+       struct xive_irq_bitmap *xibm;
+       unsigned long flags;
+       int irq = -ENOENT;
+
+       list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+               spin_lock_irqsave(&xibm->lock, flags);
+               irq = __xive_irq_bitmap_alloc(xibm);
+               spin_unlock_irqrestore(&xibm->lock, flags);
+               if (irq >= 0)
+                       break;
+       }
+       return irq;
+}
+
+static void xive_irq_bitmap_free(int irq)
+{
+       unsigned long flags;
+       struct xive_irq_bitmap *xibm;
+
+       list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+               if ((irq >= xibm->base) && (irq < xibm->base + xibm->count)) {
+                       spin_lock_irqsave(&xibm->lock, flags);
+                       clear_bit(irq - xibm->base, xibm->bitmap);
+                       spin_unlock_irqrestore(&xibm->lock, flags);
+                       break;
+               }
+       }
+}
+
+static long plpar_int_get_source_info(unsigned long flags,
+                                     unsigned long lisn,
+                                     unsigned long *src_flags,
+                                     unsigned long *eoi_page,
+                                     unsigned long *trig_page,
+                                     unsigned long *esb_shift)
+{
+       unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+       long rc;
+
+       rc = plpar_hcall(H_INT_GET_SOURCE_INFO, retbuf, flags, lisn);
+       if (rc) {
+               pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc);
+               return rc;
+       }
+
+       *src_flags = retbuf[0];
+       *eoi_page  = retbuf[1];
+       *trig_page = retbuf[2];
+       *esb_shift = retbuf[3];
+
+       pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n",
+               retbuf[0], retbuf[1], retbuf[2], retbuf[3]);
+
+       return 0;
+}
+
+#define XIVE_SRC_SET_EISN (1ull << (63 - 62))
+#define XIVE_SRC_MASK     (1ull << (63 - 63)) /* unused */
+
+static long plpar_int_set_source_config(unsigned long flags,
+                                       unsigned long lisn,
+                                       unsigned long target,
+                                       unsigned long prio,
+                                       unsigned long sw_irq)
+{
+       long rc;
+
+
+       pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx prio=%lx sw_irq=%lx\n",
+               flags, lisn, target, prio, sw_irq);
+
+
+       rc = plpar_hcall_norets(H_INT_SET_SOURCE_CONFIG, flags, lisn,
+                               target, prio, sw_irq);
+       if (rc) {
+               pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n",
+                      lisn, target, prio, rc);
+               return rc;
+       }
+
+       return 0;
+}
+
+static long plpar_int_get_queue_info(unsigned long flags,
+                                    unsigned long target,
+                                    unsigned long priority,
+                                    unsigned long *esn_page,
+                                    unsigned long *esn_size)
+{
+       unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+       long rc;
+
+       rc = plpar_hcall(H_INT_GET_QUEUE_INFO, retbuf, flags, target, priority);
+       if (rc) {
+               pr_err("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld failed %ld\n",
+                      target, priority, rc);
+               return rc;
+       }
+
+       *esn_page = retbuf[0];
+       *esn_size = retbuf[1];
+
+       pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n",
+               retbuf[0], retbuf[1]);
+
+       return 0;
+}
+
+#define XIVE_EQ_ALWAYS_NOTIFY (1ull << (63 - 63))
+
+static long plpar_int_set_queue_config(unsigned long flags,
+                                      unsigned long target,
+                                      unsigned long priority,
+                                      unsigned long qpage,
+                                      unsigned long qsize)
+{
+       long rc;
+
+       pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n",
+               flags,  target, priority, qpage, qsize);
+
+       rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
+                               priority, qpage, qsize);
+       if (rc) {
+               pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n",
+                      target, priority, qpage, rc);
+               return  rc;
+       }
+
+       return 0;
+}
+
+static long plpar_int_sync(unsigned long flags, unsigned long lisn)
+{
+       long rc;
+
+       rc = plpar_hcall_norets(H_INT_SYNC, flags, lisn);
+       if (rc) {
+               pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc);
+               return  rc;
+       }
+
+       return 0;
+}
+
+#define XIVE_SRC_H_INT_ESB     (1ull << (63 - 60)) /* TODO */
+#define XIVE_SRC_LSI           (1ull << (63 - 61))
+#define XIVE_SRC_TRIGGER       (1ull << (63 - 62))
+#define XIVE_SRC_STORE_EOI     (1ull << (63 - 63))
+
+static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
+{
+       long rc;
+       unsigned long flags;
+       unsigned long eoi_page;
+       unsigned long trig_page;
+       unsigned long esb_shift;
+
+       memset(data, 0, sizeof(*data));
+
+       rc = plpar_int_get_source_info(0, hw_irq, &flags, &eoi_page, &trig_page,
+                                      &esb_shift);
+       if (rc)
+               return  -EINVAL;
+
+       if (flags & XIVE_SRC_STORE_EOI)
+               data->flags  |= XIVE_IRQ_FLAG_STORE_EOI;
+       if (flags & XIVE_SRC_LSI)
+               data->flags  |= XIVE_IRQ_FLAG_LSI;
+       data->eoi_page  = eoi_page;
+       data->esb_shift = esb_shift;
+       data->trig_page = trig_page;
+
+       /*
+        * No chip-id for the sPAPR backend. This has an impact how we
+        * pick a target. See xive_pick_irq_target().
+        */
+       data->src_chip = XIVE_INVALID_CHIP_ID;
+
+       data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
+       if (!data->eoi_mmio) {
+               pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
+               return -ENOMEM;
+       }
+
+       /* Full function page supports trigger */
+       if (flags & XIVE_SRC_TRIGGER) {
+               data->trig_mmio = data->eoi_mmio;
+               return 0;
+       }
+
+       data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
+       if (!data->trig_mmio) {
+               pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+static int xive_spapr_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
+{
+       long rc;
+
+       rc = plpar_int_set_source_config(XIVE_SRC_SET_EISN, hw_irq, target,
+                                        prio, sw_irq);
+
+       return rc == 0 ? 0 : -ENXIO;
+}
+
+/* This can be called multiple time to change a queue configuration */
+static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
+                                  __be32 *qpage, u32 order)
+{
+       s64 rc = 0;
+       unsigned long esn_page;
+       unsigned long esn_size;
+       u64 flags, qpage_phys;
+
+       /* If there's an actual queue page, clean it */
+       if (order) {
+               if (WARN_ON(!qpage))
+                       return -EINVAL;
+               qpage_phys = __pa(qpage);
+       } else {
+               qpage_phys = 0;
+       }
+
+       /* Initialize the rest of the fields */
+       q->msk = order ? ((1u << (order - 2)) - 1) : 0;
+       q->idx = 0;
+       q->toggle = 0;
+
+       rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
+       if (rc) {
+               pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+               rc = -EIO;
+               goto fail;
+       }
+
+       /* TODO: add support for the notification page */
+       q->eoi_phys = esn_page;
+
+       /* Default is to always notify */
+       flags = XIVE_EQ_ALWAYS_NOTIFY;
+
+       /* Configure and enable the queue in HW */
+       rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
+       if (rc) {
+               pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+               rc = -EIO;
+       } else {
+               q->qpage = qpage;
+       }
+fail:
+       return rc;
+}
+
+static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
+                                 u8 prio)
+{
+       struct xive_q *q = &xc->queue[prio];
+       __be32 *qpage;
+
+       qpage = xive_queue_page_alloc(cpu, xive_queue_shift);
+       if (IS_ERR(qpage))
+               return PTR_ERR(qpage);
+
+       return xive_spapr_configure_queue(cpu, q, prio, qpage,
+                                         xive_queue_shift);
+}
+
+static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
+                                 u8 prio)
+{
+       struct xive_q *q = &xc->queue[prio];
+       unsigned int alloc_order;
+       long rc;
+
+       rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
+       if (rc)
+               pr_err("Error %ld setting queue for prio %d\n", rc, prio);
+
+       alloc_order = xive_alloc_order(xive_queue_shift);
+       free_pages((unsigned long)q->qpage, alloc_order);
+       q->qpage = NULL;
+}
+
+static bool xive_spapr_match(struct device_node *node)
+{
+       /* Ignore cascaded controllers for the moment */
+       return 1;
+}
+
+#ifdef CONFIG_SMP
+static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+       int irq = xive_irq_bitmap_alloc();
+
+       if (irq < 0) {
+               pr_err("Failed to allocate IPI on CPU %d\n", cpu);
+               return -ENXIO;
+       }
+
+       xc->hw_ipi = irq;
+       return 0;
+}
+
+static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+       xive_irq_bitmap_free(xc->hw_ipi);
+}
+#endif /* CONFIG_SMP */
+
+static void xive_spapr_shutdown(void)
+{
+       long rc;
+
+       rc = plpar_hcall_norets(H_INT_RESET, 0);
+       if (rc)
+               pr_err("H_INT_RESET failed %ld\n", rc);
+}
+
+/*
+ * Perform an "ack" cycle on the current thread. Grab the pending
+ * active priorities and update the CPPR to the most favored one.
+ */
+static void xive_spapr_update_pending(struct xive_cpu *xc)
+{
+       u8 nsr, cppr;
+       u16 ack;
+
+       /*
+        * Perform the "Acknowledge O/S to Register" cycle.
+        *
+        * Let's speedup the access to the TIMA using the raw I/O
+        * accessor as we don't need the synchronisation routine of
+        * the higher level ones
+        */
+       ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
+
+       /* Synchronize subsequent queue accesses */
+       mb();
+
+       /*
+        * Grab the CPPR and the "NSR" field which indicates the source
+        * of the interrupt (if any)
+        */
+       cppr = ack & 0xff;
+       nsr = ack >> 8;
+
+       if (nsr & TM_QW1_NSR_EO) {
+               if (cppr == 0xff)
+                       return;
+               /* Mark the priority pending */
+               xc->pending_prio |= 1 << cppr;
+
+               /*
+                * A new interrupt should never have a CPPR less favored
+                * than our current one.
+                */
+               if (cppr >= xc->cppr)
+                       pr_err("CPU %d odd ack CPPR, got %d at %d\n",
+                              smp_processor_id(), cppr, xc->cppr);
+
+               /* Update our idea of what the CPPR is */
+               xc->cppr = cppr;
+       }
+}
+
+static void xive_spapr_eoi(u32 hw_irq)
+{
+       /* Not used */;
+}
+
+static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+       /* Only some debug on the TIMA settings */
+       pr_debug("(HW value: %08x %08x %08x)\n",
+                in_be32(xive_tima + TM_QW1_OS + TM_WORD0),
+                in_be32(xive_tima + TM_QW1_OS + TM_WORD1),
+                in_be32(xive_tima + TM_QW1_OS + TM_WORD2));
+}
+
+static void xive_spapr_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+       /* Nothing to do */;
+}
+
+static void xive_spapr_sync_source(u32 hw_irq)
+{
+       /* Specs are unclear on what this is doing */
+       plpar_int_sync(0, hw_irq);
+}
+
+static const struct xive_ops xive_spapr_ops = {
+       .populate_irq_data      = xive_spapr_populate_irq_data,
+       .configure_irq          = xive_spapr_configure_irq,
+       .setup_queue            = xive_spapr_setup_queue,
+       .cleanup_queue          = xive_spapr_cleanup_queue,
+       .match                  = xive_spapr_match,
+       .shutdown               = xive_spapr_shutdown,
+       .update_pending         = xive_spapr_update_pending,
+       .eoi                    = xive_spapr_eoi,
+       .setup_cpu              = xive_spapr_setup_cpu,
+       .teardown_cpu           = xive_spapr_teardown_cpu,
+       .sync_source            = xive_spapr_sync_source,
+#ifdef CONFIG_SMP
+       .get_ipi                = xive_spapr_get_ipi,
+       .put_ipi                = xive_spapr_put_ipi,
+#endif /* CONFIG_SMP */
+       .name                   = "spapr",
+};
+
+/*
+ * get max priority from "/ibm,plat-res-int-priorities"
+ */
+static bool xive_get_max_prio(u8 *max_prio)
+{
+       struct device_node *rootdn;
+       const __be32 *reg;
+       u32 len;
+       int prio, found;
+
+       rootdn = of_find_node_by_path("/");
+       if (!rootdn) {
+               pr_err("not root node found !\n");
+               return false;
+       }
+
+       reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len);
+       if (!reg) {
+               pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n");
+               return false;
+       }
+
+       if (len % (2 * sizeof(u32)) != 0) {
+               pr_err("invalid 'ibm,plat-res-int-priorities' property\n");
+               return false;
+       }
+
+       /* HW supports priorities in the range [0-7] and 0xFF is a
+        * wildcard priority used to mask. We scan the ranges reserved
+        * by the hypervisor to find the lowest priority we can use.
+        */
+       found = 0xFF;
+       for (prio = 0; prio < 8; prio++) {
+               int reserved = 0;
+               int i;
+
+               for (i = 0; i < len / (2 * sizeof(u32)); i++) {
+                       int base  = be32_to_cpu(reg[2 * i]);
+                       int range = be32_to_cpu(reg[2 * i + 1]);
+
+                       if (prio >= base && prio < base + range)
+                               reserved++;
+               }
+
+               if (!reserved)
+                       found = prio;
+       }
+
+       if (found == 0xFF) {
+               pr_err("no valid priority found in 'ibm,plat-res-int-priorities'\n");
+               return false;
+       }
+
+       *max_prio = found;
+       return true;
+}
+
+bool xive_spapr_init(void)
+{
+       struct device_node *np;
+       struct resource r;
+       void __iomem *tima;
+       struct property *prop;
+       u8 max_prio;
+       u32 val;
+       u32 len;
+       const __be32 *reg;
+       int i;
+
+       if (xive_cmdline_disabled)
+               return false;
+
+       pr_devel("%s()\n", __func__);
+       np = of_find_compatible_node(NULL, NULL, "ibm,power-ivpe");
+       if (!np) {
+               pr_devel("not found !\n");
+               return false;
+       }
+       pr_devel("Found %s\n", np->full_name);
+
+       /* Resource 1 is the OS ring TIMA */
+       if (of_address_to_resource(np, 1, &r)) {
+               pr_err("Failed to get thread mgmnt area resource\n");
+               return false;
+       }
+       tima = ioremap(r.start, resource_size(&r));
+       if (!tima) {
+               pr_err("Failed to map thread mgmnt area\n");
+               return false;
+       }
+
+       if (!xive_get_max_prio(&max_prio))
+               return false;
+
+       /* Feed the IRQ number allocator with the ranges given in the DT */
+       reg = of_get_property(np, "ibm,xive-lisn-ranges", &len);
+       if (!reg) {
+               pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n");
+               return false;
+       }
+
+       if (len % (2 * sizeof(u32)) != 0) {
+               pr_err("invalid 'ibm,xive-lisn-ranges' property\n");
+               return false;
+       }
+
+       for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2)
+               xive_irq_bitmap_add(be32_to_cpu(reg[0]),
+                                   be32_to_cpu(reg[1]));
+
+       /* Iterate the EQ sizes and pick one */
+       of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) {
+               xive_queue_shift = val;
+               if (val == PAGE_SHIFT)
+                       break;
+       }
+
+       /* Initialize XIVE core with our backend */
+       if (!xive_core_init(&xive_spapr_ops, tima, TM_QW1_OS, max_prio))
+               return false;
+
+       pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
+       return true;
+}