powerpc: Simplify pSeries idle loop
authorPaul Mackerras <paulus@samba.org>
Mon, 27 Mar 2006 04:06:20 +0000 (15:06 +1100)
committerPaul Mackerras <paulus@samba.org>
Mon, 27 Mar 2006 04:06:20 +0000 (15:06 +1100)
Since pSeries only wants to do something different in the idle loop when
there is no work to do, we can simplify the code by implementing
ppc_md.power_save functions instead of complete idle loops.  There are
two versions: one for shared-processor partitions and one for dedicated-
processor partitions.

With this we also do a cede_processor() call on dedicated processor
partitions if the poll_pending() call indicates that the hypervisor
has work it wants to do.

Signed-off-by: Paul Mackerras <paulus@samba.org>
arch/powerpc/platforms/pseries/setup.c
include/asm-powerpc/hvcall.h

index 44d5c7fdcd9704d1b294f070799ae4d7409cf521..213bf983242f53392edd8ff247f9f65bc207c174 100644 (file)
@@ -81,8 +81,8 @@ extern void find_udbg_vterm(void);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 
-static void pseries_shared_idle(void);
-static void pseries_dedicated_idle(void);
+static void pseries_shared_idle_sleep(void);
+static void pseries_dedicated_idle_sleep(void);
 
 struct mpic *pSeries_mpic;
 
@@ -236,14 +236,13 @@ static void __init pSeries_setup_arch(void)
                vpa_init(boot_cpuid);
                if (get_lppaca()->shared_proc) {
                        printk(KERN_INFO "Using shared processor idle loop\n");
-                       ppc_md.idle_loop = pseries_shared_idle;
+                       ppc_md.power_save = pseries_shared_idle_sleep;
                } else {
                        printk(KERN_INFO "Using dedicated idle loop\n");
-                       ppc_md.idle_loop = pseries_dedicated_idle;
+                       ppc_md.power_save = pseries_dedicated_idle_sleep;
                }
        } else {
                printk(KERN_INFO "Using default idle loop\n");
-               ppc_md.idle_loop = default_idle;
        }
 
        if (firmware_has_feature(FW_FEATURE_LPAR))
@@ -393,136 +392,87 @@ static int __init pSeries_probe(int platform)
 
 DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
 
-static inline void dedicated_idle_sleep(unsigned int cpu)
-{
-       struct lppaca *plppaca = &lppaca[cpu ^ 1];
-
-       /* Only sleep if the other thread is not idle */
-       if (!(plppaca->idle)) {
-               local_irq_disable();
-
-               /*
-                * We are about to sleep the thread and so wont be polling any
-                * more.
-                */
-               clear_thread_flag(TIF_POLLING_NRFLAG);
-               smp_mb__after_clear_bit();
-
-               /*
-                * SMT dynamic mode. Cede will result in this thread going
-                * dormant, if the partner thread is still doing work.  Thread
-                * wakes up if partner goes idle, an interrupt is presented, or
-                * a prod occurs.  Returning from the cede enables external
-                * interrupts.
-                */
-               if (!need_resched())
-                       cede_processor();
-               else
-                       local_irq_enable();
-               set_thread_flag(TIF_POLLING_NRFLAG);
-       } else {
-               /*
-                * Give the HV an opportunity at the processor, since we are
-                * not doing any work.
-                */
-               poll_pending();
-       }
-}
-
-static void pseries_dedicated_idle(void)
+static void pseries_dedicated_idle_sleep(void)
 { 
        unsigned int cpu = smp_processor_id();
        unsigned long start_snooze;
        unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
-       set_thread_flag(TIF_POLLING_NRFLAG);
-
-       while (1) {
-               /*
-                * Indicate to the HV that we are idle. Now would be
-                * a good time to find other work to dispatch.
-                */
-               get_lppaca()->idle = 1;
-
-               if (!need_resched()) {
-                       start_snooze = get_tb() +
-                               *smt_snooze_delay * tb_ticks_per_usec;
-
-                       while (!need_resched() && !cpu_is_offline(cpu)) {
-                               ppc64_runlatch_off();
-
-                               /*
-                                * Go into low thread priority and possibly
-                                * low power mode.
-                                */
-                               HMT_low();
-                               HMT_very_low();
-
-                               if (*smt_snooze_delay != 0 &&
-                                   get_tb() > start_snooze) {
-                                       HMT_medium();
-                                       dedicated_idle_sleep(cpu);
-                               }
-
-                       }
-
-                       HMT_medium();
-               }
 
-               get_lppaca()->idle = 0;
-               ppc64_runlatch_on();
+       /*
+        * Indicate to the HV that we are idle. Now would be
+        * a good time to find other work to dispatch.
+        */
+       get_lppaca()->idle = 1;
 
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+       /*
+        * We come in with interrupts disabled, and need_resched()
+        * has been checked recently.  If we should poll for a little
+        * while, do so.
+        */
+       if (*smt_snooze_delay) {
+               start_snooze = get_tb() +
+                       *smt_snooze_delay * tb_ticks_per_usec;
+               local_irq_enable();
+               set_thread_flag(TIF_POLLING_NRFLAG);
 
-               if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
-                       cpu_die();
+               while (get_tb() < start_snooze) {
+                       if (need_resched() || cpu_is_offline(cpu))
+                               goto out;
+                       ppc64_runlatch_off();
+                       HMT_low();
+                       HMT_very_low();
+               }
+
+               HMT_medium();
+               clear_thread_flag(TIF_POLLING_NRFLAG);
+               smp_mb();
+               local_irq_disable();
+               if (need_resched() || cpu_is_offline(cpu))
+                       goto out;
        }
+
+       /*
+        * Cede if the other thread is not idle, so that it can
+        * go single-threaded.  If the other thread is idle,
+        * we ask the hypervisor if it has pending work it
+        * wants to do and cede if it does.  Otherwise we keep
+        * polling in order to reduce interrupt latency.
+        *
+        * Doing the cede when the other thread is active will
+        * result in this thread going dormant, meaning the other
+        * thread gets to run in single-threaded (ST) mode, which
+        * is slightly faster than SMT mode with this thread at
+        * very low priority.  The cede enables interrupts, which
+        * doesn't matter here.
+        */
+       if (!lppaca[cpu ^ 1].idle || poll_pending() == H_Pending)
+               cede_processor();
+
+out:
+       HMT_medium();
+       get_lppaca()->idle = 0;
 }
 
-static void pseries_shared_idle(void)
+static void pseries_shared_idle_sleep(void)
 {
        unsigned int cpu = smp_processor_id();
 
-       while (1) {
-               /*
-                * Indicate to the HV that we are idle. Now would be
-                * a good time to find other work to dispatch.
-                */
-               get_lppaca()->idle = 1;
-
-               while (!need_resched() && !cpu_is_offline(cpu)) {
-                       local_irq_disable();
-                       ppc64_runlatch_off();
-
-                       /*
-                        * Yield the processor to the hypervisor.  We return if
-                        * an external interrupt occurs (which are driven prior
-                        * to returning here) or if a prod occurs from another
-                        * processor. When returning here, external interrupts
-                        * are enabled.
-                        *
-                        * Check need_resched() again with interrupts disabled
-                        * to avoid a race.
-                        */
-                       if (!need_resched())
-                               cede_processor();
-                       else
-                               local_irq_enable();
-
-                       HMT_medium();
-               }
-
-               get_lppaca()->idle = 0;
-               ppc64_runlatch_on();
+       /*
+        * Indicate to the HV that we are idle. Now would be
+        * a good time to find other work to dispatch.
+        */
+       get_lppaca()->idle = 1;
 
-               preempt_enable_no_resched();
-               schedule();
-               preempt_disable();
+       /*
+        * Yield the processor to the hypervisor.  We return if
+        * an external interrupt occurs (which are driven prior
+        * to returning here) or if a prod occurs from another
+        * processor. When returning here, external interrupts
+        * are enabled.
+        */
+       cede_processor();
 
-               if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
-                       cpu_die();
-       }
+       get_lppaca()->idle = 0;
 }
 
 static int pSeries_pci_probe_mode(struct pci_bus *bus)
index 38ca9ad6110d6640b37a1b0e06bd8ed50599abbc..b72c04f3f55195ea46a29040430e413c29a33686 100644 (file)
@@ -9,6 +9,7 @@
 #define H_Closed       2       /* Resource closed */
 #define H_Constrained  4       /* Resource request constrained to max allowed */
 #define H_InProgress   14      /* Kind of like busy */
+#define H_Pending      17      /* returned from H_POLL_PENDING */
 #define H_Continue     18      /* Returned from H_Join on success */
 #define H_LongBusyStartRange   9900  /* Start of long busy range */
 #define H_LongBusyOrder1msec   9900  /* Long busy, hint that 1msec is a good time to retry */