Pull xpc-disengage into release branch
authorTony Luck <tony.luck@intel.com>
Fri, 28 Oct 2005 22:27:03 +0000 (15:27 -0700)
committerTony Luck <tony.luck@intel.com>
Fri, 28 Oct 2005 22:27:03 +0000 (15:27 -0700)
1  2 
arch/ia64/sn/kernel/xpc_main.c
arch/ia64/sn/kernel/xpc_partition.c
include/asm-ia64/sn/xp.h

index ed7c21586e98e44bcc3f815144c4bc40336c33b6,38f2c699192c053f60be99db9f58fbd0969a283a..cece3c7c69be399d39031ab80fcbe697a8b97b38
@@@ -54,6 -54,7 +54,7 @@@
  #include <linux/interrupt.h>
  #include <linux/slab.h>
  #include <linux/delay.h>
+ #include <linux/reboot.h>
  #include <asm/sn/intr.h>
  #include <asm/sn/sn_sal.h>
  #include <asm/uaccess.h>
@@@ -82,11 -83,17 +83,17 @@@ struct device *xpc_chan = &xpc_chan_dbg
  
  /* systune related variables for /proc/sys directories */
  
- static int xpc_hb_min = 1;
- static int xpc_hb_max = 10;
+ static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
+ static int xpc_hb_min_interval = 1;
+ static int xpc_hb_max_interval = 10;
  
- static int xpc_hb_check_min = 10;
- static int xpc_hb_check_max = 120;
+ static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
+ static int xpc_hb_check_min_interval = 10;
+ static int xpc_hb_check_max_interval = 120;
+ int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
+ static int xpc_disengage_request_min_timelimit = 0;
+ static int xpc_disengage_request_max_timelimit = 120;
  
  static ctl_table xpc_sys_xpc_hb_dir[] = {
        {
                &proc_dointvec_minmax,
                &sysctl_intvec,
                NULL,
-               &xpc_hb_min, &xpc_hb_max
+               &xpc_hb_min_interval,
+               &xpc_hb_max_interval
        },
        {
                2,
                &proc_dointvec_minmax,
                &sysctl_intvec,
                NULL,
-               &xpc_hb_check_min, &xpc_hb_check_max
+               &xpc_hb_check_min_interval,
+               &xpc_hb_check_max_interval
        },
        {0}
  };
@@@ -124,6 -133,19 +133,19 @@@ static ctl_table xpc_sys_xpc_dir[] = 
                0555,
                xpc_sys_xpc_hb_dir
        },
+       {
+               2,
+               "disengage_request_timelimit",
+               &xpc_disengage_request_timelimit,
+               sizeof(int),
+               0644,
+               NULL,
+               &proc_dointvec_minmax,
+               &sysctl_intvec,
+               NULL,
+               &xpc_disengage_request_min_timelimit,
+               &xpc_disengage_request_max_timelimit
+       },
        {0}
  };
  static ctl_table xpc_sys_dir[] = {
@@@ -148,10 -170,10 +170,10 @@@ static DECLARE_WAIT_QUEUE_HEAD(xpc_act_
  
  static unsigned long xpc_hb_check_timeout;
  
- /* xpc_hb_checker thread exited notification */
+ /* notification that the xpc_hb_checker thread has exited */
  static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited);
  
- /* xpc_discovery thread exited notification */
+ /* notification that the xpc_discovery thread has exited */
  static DECLARE_MUTEX_LOCKED(xpc_discovery_exited);
  
  
@@@ -161,6 -183,30 +183,30 @@@ static struct timer_list xpc_hb_timer
  static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
  
  
+ static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
+ static struct notifier_block xpc_reboot_notifier = {
+       .notifier_call = xpc_system_reboot,
+ };
+ /*
+  * Timer function to enforce the timelimit on the partition disengage request.
+  */
+ static void
+ xpc_timeout_partition_disengage_request(unsigned long data)
+ {
+       struct xpc_partition *part = (struct xpc_partition *) data;
+       DBUG_ON(jiffies < part->disengage_request_timeout);
+       (void) xpc_partition_disengaged(part);
+       DBUG_ON(part->disengage_request_timeout != 0);
+       DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
+ }
  /*
   * Notify the heartbeat check thread that an IRQ has been received.
   */
@@@ -214,12 -260,6 +260,6 @@@ xpc_hb_checker(void *ignore
  
        while (!(volatile int) xpc_exiting) {
  
-               /* wait for IRQ or timeout */
-               (void) wait_event_interruptible(xpc_act_IRQ_wq,
-                           (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
-                                       jiffies >= xpc_hb_check_timeout ||
-                                               (volatile int) xpc_exiting));
                dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
                        "been received\n",
                        (int) (xpc_hb_check_timeout - jiffies),
                }
  
  
+               /* check for outstanding IRQs */
                new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
                if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
                        force_IRQ = 0;
                        xpc_hb_check_timeout = jiffies +
                                           (xpc_hb_check_interval * HZ);
                }
+               /* wait for IRQ or timeout */
+               (void) wait_event_interruptible(xpc_act_IRQ_wq,
+                           (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
+                                       jiffies >= xpc_hb_check_timeout ||
+                                               (volatile int) xpc_exiting));
        }
  
        dev_dbg(xpc_part, "heartbeat checker is exiting\n");
  
  
-       /* mark this thread as inactive */
+       /* mark this thread as having exited */
        up(&xpc_hb_checker_exited);
        return 0;
  }
@@@ -282,7 -329,7 +329,7 @@@ xpc_initiate_discovery(void *ignore
  
        dev_dbg(xpc_part, "discovery thread is exiting\n");
  
-       /* mark this thread as inactive */
+       /* mark this thread as having exited */
        up(&xpc_discovery_exited);
        return 0;
  }
@@@ -309,7 -356,7 +356,7 @@@ xpc_make_first_contact(struct xpc_parti
                        "partition %d\n", XPC_PARTID(part));
  
                /* wait a 1/4 of a second or so */
-               msleep_interruptible(250);
+               (void) msleep_interruptible(250);
  
                if (part->act_state == XPC_P_DEACTIVATING) {
                        return part->reason;
@@@ -336,7 -383,8 +383,8 @@@ static voi
  xpc_channel_mgr(struct xpc_partition *part)
  {
        while (part->act_state != XPC_P_DEACTIVATING ||
-                               atomic_read(&part->nchannels_active) > 0) {
+                       atomic_read(&part->nchannels_active) > 0 ||
+                                       !xpc_partition_disengaged(part)) {
  
                xpc_process_channel_activity(part);
  
                                (volatile u64) part->local_IPI_amo != 0 ||
                                ((volatile u8) part->act_state ==
                                                        XPC_P_DEACTIVATING &&
-                               atomic_read(&part->nchannels_active) == 0)));
+                               atomic_read(&part->nchannels_active) == 0 &&
+                               xpc_partition_disengaged(part))));
                atomic_set(&part->channel_mgr_requests, 1);
  
                // >>> Does it need to wakeup periodically as well? In case we
@@@ -482,7 -531,7 +531,7 @@@ xpc_activating(void *__partid
                return 0;
        }
  
-       XPC_ALLOW_HB(partid, xpc_vars);
+       xpc_allow_hb(partid, xpc_vars);
        xpc_IPI_send_activated(part);
  
  
         */
        (void) xpc_partition_up(part);
  
+       xpc_disallow_hb(partid, xpc_vars);
        xpc_mark_partition_inactive(part);
  
        if (part->reason == xpcReactivating) {
@@@ -670,6 -720,7 +720,7 @@@ xpc_daemonize_kthread(void *args
        struct xpc_partition *part = &xpc_partitions[partid];
        struct xpc_channel *ch;
        int n_needed;
+       unsigned long irq_flags;
  
  
        daemonize("xpc%02dc%d", partid, ch_number);
        ch = &part->channels[ch_number];
  
        if (!(ch->flags & XPC_C_DISCONNECTING)) {
-               DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
  
                /* let registerer know that connection has been established */
  
-               if (atomic_read(&ch->kthreads_assigned) == 1) {
+               spin_lock_irqsave(&ch->lock, irq_flags);
+               if (!(ch->flags & XPC_C_CONNECTCALLOUT)) {
+                       ch->flags |= XPC_C_CONNECTCALLOUT;
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
                        xpc_connected_callout(ch);
  
                        /*
                                        !(ch->flags & XPC_C_DISCONNECTING)) {
                                xpc_activate_kthreads(ch, n_needed);
                        }
+               } else {
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
                }
  
                xpc_kthread_waitmsgs(part, ch);
        }
  
-       if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
-                       ((ch->flags & XPC_C_CONNECTCALLOUT) ||
-                               (ch->reason != xpcUnregistering &&
-                                       ch->reason != xpcOtherUnregistering))) {
-               xpc_disconnected_callout(ch);
+       if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
+               spin_lock_irqsave(&ch->lock, irq_flags);
+               if ((ch->flags & XPC_C_CONNECTCALLOUT) &&
+                               !(ch->flags & XPC_C_DISCONNECTCALLOUT)) {
+                       ch->flags |= XPC_C_DISCONNECTCALLOUT;
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
+                       xpc_disconnecting_callout(ch);
+               } else {
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
+               }
+               if (atomic_dec_return(&part->nchannels_engaged) == 0) {
+                       xpc_mark_partition_disengaged(part);
+                       xpc_IPI_send_disengage(part);
+               }
        }
  
  
@@@ -740,12 -806,33 +806,33 @@@ xpc_create_kthreads(struct xpc_channel 
        unsigned long irq_flags;
        pid_t pid;
        u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
+       struct xpc_partition *part = &xpc_partitions[ch->partid];
  
  
        while (needed-- > 0) {
+               /*
+                * The following is done on behalf of the newly created
+                * kthread. That kthread is responsible for doing the
+                * counterpart to the following before it exits.
+                */
+               (void) xpc_part_ref(part);
+               xpc_msgqueue_ref(ch);
+               if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
+                   atomic_inc_return(&part->nchannels_engaged) == 1) {
+                       xpc_mark_partition_engaged(part);
+               }
                pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0);
                if (pid < 0) {
                        /* the fork failed */
+                       if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+                           atomic_dec_return(&part->nchannels_engaged) == 0) {
+                               xpc_mark_partition_disengaged(part);
+                               xpc_IPI_send_disengage(part);
+                       }
+                       xpc_msgqueue_deref(ch);
+                       xpc_part_deref(part);
  
                        if (atomic_read(&ch->kthreads_assigned) <
                                                ch->kthreads_idle_limit) {
                        break;
                }
  
-               /*
-                * The following is done on behalf of the newly created
-                * kthread. That kthread is responsible for doing the
-                * counterpart to the following before it exits.
-                */
-               (void) xpc_part_ref(&xpc_partitions[ch->partid]);
-               xpc_msgqueue_ref(ch);
-               atomic_inc(&ch->kthreads_assigned);
                ch->kthreads_created++; // >>> temporary debug only!!!
        }
  }
  void
  xpc_disconnect_wait(int ch_number)
  {
+       unsigned long irq_flags;
        partid_t partid;
        struct xpc_partition *part;
        struct xpc_channel *ch;
+       int wakeup_channel_mgr;
  
  
        /* now wait for all callouts to the caller's function to cease */
        for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
                part = &xpc_partitions[partid];
  
-               if (xpc_part_ref(part)) {
-                       ch = &part->channels[ch_number];
+               if (!xpc_part_ref(part)) {
+                       continue;
+               }
  
- // >>> how do we keep from falling into the window between our check and going
- // >>> down and coming back up where sema is re-inited?
-                       if (ch->flags & XPC_C_SETUP) {
-                               (void) down(&ch->teardown_sema);
-                       }
+               ch = &part->channels[ch_number];
  
+               if (!(ch->flags & XPC_C_WDISCONNECT)) {
                        xpc_part_deref(part);
+                       continue;
+               }
+               (void) down(&ch->wdisconnect_sema);
+               spin_lock_irqsave(&ch->lock, irq_flags);
+               DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+               wakeup_channel_mgr = 0;
+               if (ch->delayed_IPI_flags) {
+                       if (part->act_state != XPC_P_DEACTIVATING) {
+                               spin_lock(&part->IPI_lock);
+                               XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+                                       ch->number, ch->delayed_IPI_flags);
+                               spin_unlock(&part->IPI_lock);
+                               wakeup_channel_mgr = 1;
+                       }
+                       ch->delayed_IPI_flags = 0;
                }
+               ch->flags &= ~XPC_C_WDISCONNECT;
+               spin_unlock_irqrestore(&ch->lock, irq_flags);
+               if (wakeup_channel_mgr) {
+                       xpc_wakeup_channel_mgr(part);
+               }
+               xpc_part_deref(part);
        }
  }
  
  
  static void
- xpc_do_exit(void)
+ xpc_do_exit(enum xpc_retval reason)
  {
        partid_t partid;
        int active_part_count;
        struct xpc_partition *part;
+       unsigned long printmsg_time;
  
  
-       /* now it's time to eliminate our heartbeat */
-       del_timer_sync(&xpc_hb_timer);
-       xpc_vars->heartbeating_to_mask = 0;
-       /* indicate to others that our reserved page is uninitialized */
-       xpc_rsvd_page->vars_pa = 0;
-       /*
-        * Ignore all incoming interrupts. Without interupts the heartbeat
-        * checker won't activate any new partitions that may come up.
-        */
-       free_irq(SGI_XPC_ACTIVATE, NULL);
+       /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
+       DBUG_ON(xpc_exiting == 1);
  
        /*
-        * Cause the heartbeat checker and the discovery threads to exit.
-        * We don't want them attempting to activate new partitions as we
-        * try to deactivate the existing ones.
+        * Let the heartbeat checker thread and the discovery thread
+        * (if one is running) know that they should exit. Also wake up
+        * the heartbeat checker thread in case it's sleeping.
         */
        xpc_exiting = 1;
        wake_up_interruptible(&xpc_act_IRQ_wq);
  
-       /* wait for the heartbeat checker thread to mark itself inactive */
-       down(&xpc_hb_checker_exited);
+       /* ignore all incoming interrupts */
+       free_irq(SGI_XPC_ACTIVATE, NULL);
  
-       /* wait for the discovery thread to mark itself inactive */
+       /* wait for the discovery thread to exit */
        down(&xpc_discovery_exited);
  
+       /* wait for the heartbeat checker thread to exit */
+       down(&xpc_hb_checker_exited);
  
-       msleep_interruptible(300);
+       /* sleep for a 1/3 of a second or so */
+       (void) msleep_interruptible(300);
  
  
        /* wait for all partitions to become inactive */
  
+       printmsg_time = jiffies;
        do {
                active_part_count = 0;
  
                for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
                        part = &xpc_partitions[partid];
-                       if (part->act_state != XPC_P_INACTIVE) {
-                               active_part_count++;
  
-                               XPC_DEACTIVATE_PARTITION(part, xpcUnloading);
+                       if (xpc_partition_disengaged(part) &&
+                                       part->act_state == XPC_P_INACTIVE) {
+                               continue;
                        }
+                       active_part_count++;
+                       XPC_DEACTIVATE_PARTITION(part, reason);
                }
  
-               if (active_part_count)
-                       msleep_interruptible(300);
-       } while (active_part_count > 0);
+               if (active_part_count == 0) {
+                       break;
+               }
  
+               if (jiffies >= printmsg_time) {
+                       dev_info(xpc_part, "waiting for partitions to "
+                               "deactivate/disengage, active count=%d, remote "
+                               "engaged=0x%lx\n", active_part_count,
+                               xpc_partition_engaged(1UL << partid));
+                       printmsg_time = jiffies +
+                                       (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+               }
+               /* sleep for a 1/3 of a second or so */
+               (void) msleep_interruptible(300);
+       } while (1);
+       DBUG_ON(xpc_partition_engaged(-1UL));
+       /* indicate to others that our reserved page is uninitialized */
+       xpc_rsvd_page->vars_pa = 0;
+       /* now it's time to eliminate our heartbeat */
+       del_timer_sync(&xpc_hb_timer);
+       DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
+       /* take ourselves off of the reboot_notifier_list */
+       (void) unregister_reboot_notifier(&xpc_reboot_notifier);
  
        /* close down protections for IPI operations */
        xpc_restrict_IPI_ops();
  }
  
  
+ /*
+  * This function is called when the system is being rebooted.
+  */
+ static int
+ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+ {
+       enum xpc_retval reason;
+       switch (event) {
+       case SYS_RESTART:
+               reason = xpcSystemReboot;
+               break;
+       case SYS_HALT:
+               reason = xpcSystemHalt;
+               break;
+       case SYS_POWER_OFF:
+               reason = xpcSystemPoweroff;
+               break;
+       default:
+               reason = xpcSystemGoingDown;
+       }
+       xpc_do_exit(reason);
+       return NOTIFY_DONE;
+ }
  int __init
  xpc_init(void)
  {
        pid_t pid;
  
  
 +      if (!ia64_platform_is("sn2")) {
 +              return -ENODEV;
 +      }
 +
        /*
         * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
-        * both a partition's reserved page and its XPC variables. Its size was
-        * based on the size of a reserved page. So we need to ensure that the
-        * XPC variables will fit as well.
+        * various portions of a partition's reserved page. Its size is based
+        * on the size of the reserved page header and part_nasids mask. So we
+        * need to ensure that the other items will fit as well.
         */
-       if (XPC_VARS_ALIGNED_SIZE > XPC_RSVD_PAGE_ALIGNED_SIZE) {
+       if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) {
                dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
                return -EPERM;
        }
                spin_lock_init(&part->act_lock);
                part->act_state = XPC_P_INACTIVE;
                XPC_SET_REASON(part, 0, 0);
+               init_timer(&part->disengage_request_timer);
+               part->disengage_request_timer.function =
+                               xpc_timeout_partition_disengage_request;
+               part->disengage_request_timer.data = (unsigned long) part;
                part->setup_state = XPC_P_UNSET;
                init_waitqueue_head(&part->teardown_wq);
                atomic_set(&part->references, 0);
        }
  
  
+       /* add ourselves to the reboot_notifier_list */
+       ret = register_reboot_notifier(&xpc_reboot_notifier);
+       if (ret != 0) {
+               dev_warn(xpc_part, "can't register reboot notifier\n");
+       }
        /*
         * Set the beating to other partitions into motion.  This is
         * the last requirement for other partitions' discovery to
                /* indicate to others that our reserved page is uninitialized */
                xpc_rsvd_page->vars_pa = 0;
  
+               /* take ourselves off of the reboot_notifier_list */
+               (void) unregister_reboot_notifier(&xpc_reboot_notifier);
                del_timer_sync(&xpc_hb_timer);
                free_irq(SGI_XPC_ACTIVATE, NULL);
                xpc_restrict_IPI_ops();
                /* mark this new thread as a non-starter */
                up(&xpc_discovery_exited);
  
-               xpc_do_exit();
+               xpc_do_exit(xpcUnloading);
                return -EBUSY;
        }
  
@@@ -1043,7 -1217,7 +1221,7 @@@ module_init(xpc_init)
  void __exit
  xpc_exit(void)
  {
-       xpc_do_exit();
+       xpc_do_exit(xpcUnloading);
  }
  module_exit(xpc_exit);
  
@@@ -1060,3 -1234,7 +1238,7 @@@ module_param(xpc_hb_check_interval, int
  MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
                "heartbeat checks.");
  
+ module_param(xpc_disengage_request_timelimit, int, 0);
+ MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
+               "for disengage request to complete.");
index 72ef330fb78494922cc0dc7b6bf2c625a55aeff8,ce5a37ff4388f706cfa46ee746d261437dedbf24..581e113d2d375df458f40f9c60c9d4cd79d6c8b3
@@@ -44,16 -44,19 +44,19 @@@ static u64 xpc_sh2_IPI_access3
  
  
  /* original protection values for each node */
 -u64 xpc_prot_vec[MAX_COMPACT_NODES];
 +u64 xpc_prot_vec[MAX_NUMNODES];
  
  
- /* this partition's reserved page */
+ /* this partition's reserved page pointers */
  struct xpc_rsvd_page *xpc_rsvd_page;
- /* this partition's XPC variables (within the reserved page) */
+ static u64 *xpc_part_nasids;
+ static u64 *xpc_mach_nasids;
  struct xpc_vars *xpc_vars;
  struct xpc_vars_part *xpc_vars_part;
  
+ static int xp_nasid_mask_bytes;       /* actual size in bytes of nasid mask */
+ static int xp_nasid_mask_words;       /* actual size in words of nasid mask */
  
  /*
   * For performance reasons, each entry of xpc_partitions[] is cacheline
@@@ -65,20 -68,16 +68,16 @@@ struct xpc_partition xpc_partitions[XP_
  
  
  /*
-  * Generic buffer used to store a local copy of the remote partitions
-  * reserved page or XPC variables.
+  * Generic buffer used to store a local copy of portions of a remote
+  * partition's reserved page (either its header and part_nasids mask,
+  * or its vars).
   *
   * xpc_discovery runs only once and is a seperate thread that is
   * very likely going to be processing in parallel with receiving
   * interrupts.
   */
- char ____cacheline_aligned
-               xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE];
- /* systune related variables */
- int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
- int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
+ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
+                                                       XP_NASID_MASK_BYTES];
  
  
  /*
   * for that nasid. This function returns 0 on any error.
   */
  static u64
- xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
+ xpc_get_rsvd_page_pa(int nasid)
  {
        bte_result_t bte_res;
        s64 status;
        u64 cookie = 0;
        u64 rp_pa = nasid;      /* seed with nasid */
        u64 len = 0;
+       u64 buf = buf;
+       u64 buf_len = 0;
+       void *buf_base = NULL;
  
  
        while (1) {
                        break;
                }
  
-               if (len > buf_size) {
-                       dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len);
-                       status = SALRET_ERROR;
-                       break;
+               if (L1_CACHE_ALIGN(len) > buf_len) {
+                       if (buf_base != NULL) {
+                               kfree(buf_base);
+                       }
+                       buf_len = L1_CACHE_ALIGN(len);
+                       buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len,
+                                                       GFP_KERNEL, &buf_base);
+                       if (buf_base == NULL) {
+                               dev_err(xpc_part, "unable to kmalloc "
+                                       "len=0x%016lx\n", buf_len);
+                               status = SALRET_ERROR;
+                               break;
+                       }
                }
  
-               bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size,
+               bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_len,
                                        (BTE_NOTIFY | BTE_WACQUIRE), NULL);
                if (bte_res != BTE_SUCCESS) {
                        dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
                }
        }
  
+       if (buf_base != NULL) {
+               kfree(buf_base);
+       }
        if (status != SALRET_OK) {
                rp_pa = 0;
        }
@@@ -141,15 -156,15 +156,15 @@@ xpc_rsvd_page_init(void
  {
        struct xpc_rsvd_page *rp;
        AMO_t *amos_page;
-       u64 rp_pa, next_cl, nasid_array = 0;
+       u64 rp_pa, nasid_array = 0;
        int i, ret;
  
  
        /* get the local reserved page's address */
  
-       rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0),
-                                       (u64) xpc_remote_copy_buffer,
-                                               XPC_RSVD_PAGE_ALIGNED_SIZE);
+       preempt_disable();
+       rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
+       preempt_enable();
        if (rp_pa == 0) {
                dev_err(xpc_part, "SAL failed to locate the reserved page\n");
                return NULL;
  
        rp->version = XPC_RP_VERSION;
  
-       /*
-        * Place the XPC variables on the cache line following the
-        * reserved page structure.
-        */
-       next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE;
-       xpc_vars = (struct xpc_vars *) next_cl;
+       /* establish the actual sizes of the nasid masks */
+       if (rp->SAL_version == 1) {
+               /* SAL_version 1 didn't set the nasids_size field */
+               rp->nasids_size = 128;
+       }
+       xp_nasid_mask_bytes = rp->nasids_size;
+       xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
+       /* setup the pointers to the various items in the reserved page */
+       xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
+       xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
+       xpc_vars = XPC_RP_VARS(rp);
+       xpc_vars_part = XPC_RP_VARS_PART(rp);
  
        /*
         * Before clearing xpc_vars, see if a page of AMOs had been previously
                amos_page = (AMO_t *) TO_AMO((u64) amos_page);
        }
  
+       /* clear xpc_vars */
        memset(xpc_vars, 0, sizeof(struct xpc_vars));
  
-       /*
-        * Place the XPC per partition specific variables on the cache line
-        * following the XPC variables structure.
-        */
-       next_cl += XPC_VARS_ALIGNED_SIZE;
-       memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) *
-                                                       XP_MAX_PARTITIONS);
-       xpc_vars_part = (struct xpc_vars_part *) next_cl;
-       xpc_vars->vars_part_pa = __pa(next_cl);
        xpc_vars->version = XPC_V_VERSION;
        xpc_vars->act_nasid = cpuid_to_nasid(0);
        xpc_vars->act_phys_cpuid = cpu_physical_id(0);
+       xpc_vars->vars_part_pa = __pa(xpc_vars_part);
+       xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page);
        xpc_vars->amos_page = amos_page;  /* save for next load of XPC */
  
  
-       /*
-        * Initialize the activation related AMO variables.
-        */
-       xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS);
-       for (i = 1; i < XP_NASID_MASK_WORDS; i++) {
-               xpc_IPI_init(i + XP_MAX_PARTITIONS);
+       /* clear xpc_vars_part */
+       memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
+                                                       XP_MAX_PARTITIONS);
+       /* initialize the activate IRQ related AMO variables */
+       for (i = 0; i < xp_nasid_mask_words; i++) {
+               (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
        }
-       /* export AMO page's physical address to other partitions */
-       xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page);
+       /* initialize the engaged remote partitions related AMO variables */
+       (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
+       (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
+       /* timestamp of when reserved page was setup by XPC */
+       rp->stamp = CURRENT_TIME;
  
        /*
         * This signifies to the remote partition that our reserved
@@@ -387,6 -408,11 +408,11 @@@ xpc_check_remote_hb(void
        remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
  
        for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+               if (xpc_exiting) {
+                       break;
+               }
                if (partid == sn_partition_id) {
                        continue;
                }
                /* pull the remote_hb cache line */
                bres = xp_bte_copy(part->remote_vars_pa,
                                        ia64_tpa((u64) remote_vars),
-                                       XPC_VARS_ALIGNED_SIZE,
+                                       XPC_RP_VARS_SIZE,
                                        (BTE_NOTIFY | BTE_WACQUIRE), NULL);
                if (bres != BTE_SUCCESS) {
                        XPC_DEACTIVATE_PARTITION(part,
  
                if (((remote_vars->heartbeat == part->last_heartbeat) &&
                        (remote_vars->kdb_status == 0)) ||
-                            !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
+                            !xpc_hb_allowed(sn_partition_id, remote_vars)) {
  
                        XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
                        continue;
  
  
  /*
-  * Get a copy of the remote partition's rsvd page.
+  * Get a copy of a portion of the remote partition's rsvd page.
   *
   * remote_rp points to a buffer that is cacheline aligned for BTE copies and
-  * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE.
+  * is large enough to contain a copy of their reserved page header and
+  * part_nasids mask.
   */
  static enum xpc_retval
  xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
-               struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa)
+               struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
  {
        int bres, i;
  
  
        /* get the reserved page's physical address */
  
-       *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
-                                               XPC_RSVD_PAGE_ALIGNED_SIZE);
-       if (*remote_rsvd_page_pa == 0) {
+       *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
+       if (*remote_rp_pa == 0) {
                return xpcNoRsvdPageAddr;
        }
  
  
-       /* pull over the reserved page structure */
+       /* pull over the reserved page header and part_nasids mask */
  
-       bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp),
-                               XPC_RSVD_PAGE_ALIGNED_SIZE,
+       bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
+                               XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
                                (BTE_NOTIFY | BTE_WACQUIRE), NULL);
        if (bres != BTE_SUCCESS) {
                return xpc_map_bte_errors(bres);
  
  
        if (discovered_nasids != NULL) {
-               for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
-                       discovered_nasids[i] |= remote_rp->part_nasids[i];
+               u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
+               for (i = 0; i < xp_nasid_mask_words; i++) {
+                       discovered_nasids[i] |= remote_part_nasids[i];
                }
        }
  
  
  
  /*
-  * Get a copy of the remote partition's XPC variables.
+  * Get a copy of the remote partition's XPC variables from the reserved page.
   *
   * remote_vars points to a buffer that is cacheline aligned for BTE copies and
-  * assumed to be of size XPC_VARS_ALIGNED_SIZE.
+  * assumed to be of size XPC_RP_VARS_SIZE.
   */
  static enum xpc_retval
  xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
        /* pull over the cross partition variables */
  
        bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
-                               XPC_VARS_ALIGNED_SIZE,
+                               XPC_RP_VARS_SIZE,
                                (BTE_NOTIFY | BTE_WACQUIRE), NULL);
        if (bres != BTE_SUCCESS) {
                return xpc_map_bte_errors(bres);
  
  
  /*
-  * Prior code has determine the nasid which generated an IPI.  Inspect
+  * Update the remote partition's info.
+  */
+ static void
+ xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
+               struct timespec *remote_rp_stamp, u64 remote_rp_pa,
+               u64 remote_vars_pa, struct xpc_vars *remote_vars)
+ {
+       part->remote_rp_version = remote_rp_version;
+       dev_dbg(xpc_part, "  remote_rp_version = 0x%016lx\n",
+               part->remote_rp_version);
+       part->remote_rp_stamp = *remote_rp_stamp;
+       dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
+               part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
+       part->remote_rp_pa = remote_rp_pa;
+       dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
+       part->remote_vars_pa = remote_vars_pa;
+       dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
+               part->remote_vars_pa);
+       part->last_heartbeat = remote_vars->heartbeat;
+       dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
+               part->last_heartbeat);
+       part->remote_vars_part_pa = remote_vars->vars_part_pa;
+       dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
+               part->remote_vars_part_pa);
+       part->remote_act_nasid = remote_vars->act_nasid;
+       dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
+               part->remote_act_nasid);
+       part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
+       dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
+               part->remote_act_phys_cpuid);
+       part->remote_amos_page_pa = remote_vars->amos_page_pa;
+       dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
+               part->remote_amos_page_pa);
+       part->remote_vars_version = remote_vars->version;
+       dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
+               part->remote_vars_version);
+ }
+ /*
+  * Prior code has determined the nasid which generated an IPI.  Inspect
   * that nasid to determine if its partition needs to be activated or
   * deactivated.
   *
@@@ -542,8 -620,12 +620,12 @@@ xpc_identify_act_IRQ_req(int nasid
  {
        struct xpc_rsvd_page *remote_rp;
        struct xpc_vars *remote_vars;
-       u64 remote_rsvd_page_pa;
+       u64 remote_rp_pa;
        u64 remote_vars_pa;
+       int remote_rp_version;
+       int reactivate = 0;
+       int stamp_diff;
+       struct timespec remote_rp_stamp = { 0, 0 };
        partid_t partid;
        struct xpc_partition *part;
        enum xpc_retval ret;
  
        remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
  
-       ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa);
+       ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
        if (ret != xpcSuccess) {
                dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
                        "which sent interrupt, reason=%d\n", nasid, ret);
        }
  
        remote_vars_pa = remote_rp->vars_pa;
+       remote_rp_version = remote_rp->version;
+       if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+               remote_rp_stamp = remote_rp->stamp;
+       }
        partid = remote_rp->partid;
        part = &xpc_partitions[partid];
  
                "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
                remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
  
+       if (xpc_partition_disengaged(part) &&
+                                       part->act_state == XPC_P_INACTIVE) {
  
-       if (part->act_state == XPC_P_INACTIVE) {
+               xpc_update_partition_info(part, remote_rp_version,
+                                       &remote_rp_stamp, remote_rp_pa,
+                                       remote_vars_pa, remote_vars);
  
-               part->remote_rp_pa = remote_rsvd_page_pa;
-               dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n",
-                       part->remote_rp_pa);
+               if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+                       if (xpc_partition_disengage_requested(1UL << partid)) {
+                               /*
+                                * Other side is waiting on us to disengage,
+                                * even though we already have.
+                                */
+                               return;
+                       }
+               } else {
+                       /* other side doesn't support disengage requests */
+                       xpc_clear_partition_disengage_request(1UL << partid);
+               }
  
-               part->remote_vars_pa = remote_vars_pa;
-               dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
-                       part->remote_vars_pa);
+               xpc_activate_partition(part);
+               return;
+       }
  
-               part->last_heartbeat = remote_vars->heartbeat;
-               dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
-                       part->last_heartbeat);
+       DBUG_ON(part->remote_rp_version == 0);
+       DBUG_ON(part->remote_vars_version == 0);
+       if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
+               DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
+                                                       remote_vars_version));
+               if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+                       DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
+                                                               version));
+                       /* see if the other side rebooted */
+                       if (part->remote_amos_page_pa ==
+                               remote_vars->amos_page_pa &&
+                                       xpc_hb_allowed(sn_partition_id,
+                                                               remote_vars)) {
+                               /* doesn't look that way, so ignore the IPI */
+                               return;
+                       }
+               }
  
-               part->remote_vars_part_pa = remote_vars->vars_part_pa;
-               dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
-                       part->remote_vars_part_pa);
+               /*
+                * Other side rebooted and previous XPC didn't support the
+                * disengage request, so we don't need to do anything special.
+                */
  
-               part->remote_act_nasid = remote_vars->act_nasid;
-               dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
-                       part->remote_act_nasid);
+               xpc_update_partition_info(part, remote_rp_version,
+                                               &remote_rp_stamp, remote_rp_pa,
+                                               remote_vars_pa, remote_vars);
+               part->reactivate_nasid = nasid;
+               XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
+               return;
+       }
  
-               part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
-               dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
-                       part->remote_act_phys_cpuid);
+       DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
  
-               part->remote_amos_page_pa = remote_vars->amos_page_pa;
-               dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
-                       part->remote_amos_page_pa);
+       if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+               DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
  
-               xpc_activate_partition(part);
+               /*
+                * Other side rebooted and previous XPC did support the
+                * disengage request, but the new one doesn't.
+                */
+               xpc_clear_partition_engaged(1UL << partid);
+               xpc_clear_partition_disengage_request(1UL << partid);
  
-       } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa ||
-                       !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
+               xpc_update_partition_info(part, remote_rp_version,
+                                               &remote_rp_stamp, remote_rp_pa,
+                                               remote_vars_pa, remote_vars);
+               reactivate = 1;
+       } else {
+               DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
  
+               stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
+                                                       &remote_rp_stamp);
+               if (stamp_diff != 0) {
+                       DBUG_ON(stamp_diff >= 0);
+                       /*
+                        * Other side rebooted and the previous XPC did support
+                        * the disengage request, as does the new one.
+                        */
+                       DBUG_ON(xpc_partition_engaged(1UL << partid));
+                       DBUG_ON(xpc_partition_disengage_requested(1UL <<
+                                                               partid));
+                       xpc_update_partition_info(part, remote_rp_version,
+                                               &remote_rp_stamp, remote_rp_pa,
+                                               remote_vars_pa, remote_vars);
+                       reactivate = 1;
+               }
+       }
+       if (!xpc_partition_disengaged(part)) {
+               /* still waiting on other side to disengage from us */
+               return;
+       }
+       if (reactivate) {
                part->reactivate_nasid = nasid;
                XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
+       } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
+                       xpc_partition_disengage_requested(1UL << partid)) {
+               XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
        }
  }
  
@@@ -643,14 -802,17 +802,17 @@@ xpc_identify_act_IRQ_sender(void
        u64 nasid;                      /* remote nasid */
        int n_IRQs_detected = 0;
        AMO_t *act_amos;
-       struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
  
  
-       act_amos = xpc_vars->act_amos;
+       act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
  
  
        /* scan through act AMO variable looking for non-zero entries */
-       for (word = 0; word < XP_NASID_MASK_WORDS; word++) {
+       for (word = 0; word < xp_nasid_mask_words; word++) {
+               if (xpc_exiting) {
+                       break;
+               }
  
                nasid_mask = xpc_IPI_receive(&act_amos[word]);
                if (nasid_mask == 0) {
                 * remote nasid in our reserved pages machine mask.
                 * This is used in the event of module reload.
                 */
-               rp->mach_nasids[word] |= nasid_mask;
+               xpc_mach_nasids[word] |= nasid_mask;
  
  
                /* locate the nasid(s) which sent interrupts */
  }
  
  
+ /*
+  * See if the other side has responded to a partition disengage request
+  * from us.
+  */
+ int
+ xpc_partition_disengaged(struct xpc_partition *part)
+ {
+       partid_t partid = XPC_PARTID(part);
+       int disengaged;
+       disengaged = (xpc_partition_engaged(1UL << partid) == 0);
+       if (part->disengage_request_timeout) {
+               if (!disengaged) {
+                       if (jiffies < part->disengage_request_timeout) {
+                               /* timelimit hasn't been reached yet */
+                               return 0;
+                       }
+                       /*
+                        * Other side hasn't responded to our disengage
+                        * request in a timely fashion, so assume it's dead.
+                        */
+                       xpc_clear_partition_engaged(1UL << partid);
+                       disengaged = 1;
+               }
+               part->disengage_request_timeout = 0;
+               /* cancel the timer function, provided it's not us */
+               if (!in_interrupt()) {
+                       del_singleshot_timer_sync(&part->
+                                                     disengage_request_timer);
+               }
+               DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
+                                       part->act_state != XPC_P_INACTIVE);
+               if (part->act_state != XPC_P_INACTIVE) {
+                       xpc_wakeup_channel_mgr(part);
+               }
+               if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+                       xpc_cancel_partition_disengage_request(part);
+               }
+       }
+       return disengaged;
+ }
  /*
   * Mark specified partition as active.
   */
@@@ -721,7 -932,6 +932,6 @@@ xpc_deactivate_partition(const int line
                                enum xpc_retval reason)
  {
        unsigned long irq_flags;
-       partid_t partid = XPC_PARTID(part);
  
  
        spin_lock_irqsave(&part->act_lock, irq_flags);
  
        spin_unlock_irqrestore(&part->act_lock, irq_flags);
  
-       XPC_DISALLOW_HB(partid, xpc_vars);
+       if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+               xpc_request_partition_disengage(part);
+               xpc_IPI_send_disengage(part);
  
-       dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid,
-               reason);
+               /* set a timelimit on the disengage request */
+               part->disengage_request_timeout = jiffies +
+                                       (xpc_disengage_request_timelimit * HZ);
+               part->disengage_request_timer.expires =
+                                       part->disengage_request_timeout;
+               add_timer(&part->disengage_request_timer);
+       }
+       dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
+               XPC_PARTID(part), reason);
  
-       xpc_partition_down(part, reason);
+       xpc_partition_going_down(part, reason);
  }
  
  
  /*
-  * Mark specified partition as active.
+  * Mark specified partition as inactive.
   */
  void
  xpc_mark_partition_inactive(struct xpc_partition *part)
@@@ -792,9 -1012,10 +1012,10 @@@ xpc_discovery(void
        void *remote_rp_base;
        struct xpc_rsvd_page *remote_rp;
        struct xpc_vars *remote_vars;
-       u64 remote_rsvd_page_pa;
+       u64 remote_rp_pa;
        u64 remote_vars_pa;
        int region;
+       int region_size;
        int max_regions;
        int nasid;
        struct xpc_rsvd_page *rp;
        enum xpc_retval ret;
  
  
-       remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE,
+       remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
+                                               xp_nasid_mask_bytes,
                                                GFP_KERNEL, &remote_rp_base);
        if (remote_rp == NULL) {
                return;
        remote_vars = (struct xpc_vars *) remote_rp;
  
  
-       discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS,
+       discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words,
                                                        GFP_KERNEL);
        if (discovered_nasids == NULL) {
                kfree(remote_rp_base);
                return;
        }
-       memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS);
+       memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words);
  
        rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
  
         * nodes that can comprise an access protection grouping. The access
         * protection is in regards to memory, IOI and IPI.
         */
- //>>> move the next two #defines into either include/asm-ia64/sn/arch.h or
- //>>> include/asm-ia64/sn/addrs.h
- #define SH1_MAX_REGIONS               64
- #define SH2_MAX_REGIONS               256
-       max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS;
+       max_regions = 64;
+       region_size = sn_region_size;
+       switch (region_size) {
+       case 128:
+               max_regions *= 2;
+       case 64:
+               max_regions *= 2;
+       case 32:
+               max_regions *= 2;
+               region_size = 16;
+               DBUG_ON(!is_shub2());
+       }
  
        for (region = 0; region < max_regions; region++) {
  
  
                dev_dbg(xpc_part, "searching region %d\n", region);
  
-               for (nasid = (region * sn_region_size * 2);
-                    nasid < ((region + 1) * sn_region_size * 2);
+               for (nasid = (region * region_size * 2);
+                    nasid < ((region + 1) * region_size * 2);
                     nasid += 2) {
  
                        if ((volatile int) xpc_exiting) {
                        dev_dbg(xpc_part, "checking nasid %d\n", nasid);
  
  
-                       if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) {
+                       if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
                                dev_dbg(xpc_part, "PROM indicates Nasid %d is "
                                        "part of the local partition; skipping "
                                        "region\n", nasid);
                                break;
                        }
  
-                       if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) {
+                       if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
                                dev_dbg(xpc_part, "PROM indicates Nasid %d was "
                                        "not on Numa-Link network at reset\n",
                                        nasid);
                        /* pull over the reserved page structure */
  
                        ret = xpc_get_remote_rp(nasid, discovered_nasids,
-                                             remote_rp, &remote_rsvd_page_pa);
+                                             remote_rp, &remote_rp_pa);
                        if (ret != xpcSuccess) {
                                dev_dbg(xpc_part, "unable to get reserved page "
                                        "from nasid %d, reason=%d\n", nasid,
                                remote_vars->act_nasid,
                                remote_vars->act_phys_cpuid);
  
+                       if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
+                                                               version)) {
+                               part->remote_amos_page_pa =
+                                               remote_vars->amos_page_pa;
+                               xpc_mark_partition_disengaged(part);
+                               xpc_cancel_partition_disengage_request(part);
+                       }
                        xpc_IPI_send_activate(remote_vars);
                }
        }
@@@ -974,12 -1211,12 +1211,12 @@@ xpc_initiate_partid_to_nasids(partid_t 
                return xpcPartitionDown;
        }
  
-       part_nasid_pa = part->remote_rp_pa +
-               (u64) &((struct xpc_rsvd_page *) 0)->part_nasids;
+       memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
+       part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
  
        bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
-                               L1_CACHE_ALIGN(XP_NASID_MASK_BYTES),
-                               (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+                       xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  
        return xpc_map_bte_errors(bte_res);
  }
diff --combined include/asm-ia64/sn/xp.h
index 75a2f39c6ac6e310132f2be32b09f9d021f46936,30312be312069b416fa47f47a0f33c33ee97a2b4..49faf8f26430806f6fa19fc39a6d8b65f914fe04
@@@ -49,7 -49,7 +49,7 @@@
   * C-brick nasids, thus the need for bitmaps which don't account for
   * odd-numbered (non C-brick) nasids.
   */
 -#define XP_MAX_PHYSNODE_ID    (MAX_PHYSNODE_ID / 2)
 +#define XP_MAX_PHYSNODE_ID    (MAX_NUMALINK_NODES / 2)
  #define XP_NASID_MASK_BYTES   ((XP_MAX_PHYSNODE_ID + 7) / 8)
  #define XP_NASID_MASK_WORDS   ((XP_MAX_PHYSNODE_ID + 63) / 64)
  
@@@ -217,7 -217,17 +217,17 @@@ enum xpc_retval 
        xpcInvalidPartid,       /* 42: invalid partition ID */
        xpcLocalPartid,         /* 43: local partition ID */
  
-       xpcUnknownReason        /* 44: unknown reason -- must be last in list */
+       xpcOtherGoingDown,      /* 44: other side going down, reason unknown */
+       xpcSystemGoingDown,     /* 45: system is going down, reason unknown */
+       xpcSystemHalt,          /* 46: system is being halted */
+       xpcSystemReboot,        /* 47: system is being rebooted */
+       xpcSystemPoweroff,      /* 48: system is being powered off */
+       xpcDisconnecting,       /* 49: channel disconnecting (closing) */
+       xpcOpenCloseError,      /* 50: channel open/close protocol error */
+       xpcUnknownReason        /* 51: unknown reason -- must be last in list */
  };
  
  
@@@ -342,7 -352,7 +352,7 @@@ typedef void (*xpc_notify_func)(enum xp
   *
   * The 'func' field points to the function to call when aynchronous
   * notification is required for such events as: a connection established/lost,
-  * or an incomming message received, or an error condition encountered. A
+  * or an incoming message received, or an error condition encountered. A
   * non-NULL 'func' field indicates that there is an active registration for
   * the channel.
   */