clocksource: Make watchdog reset lockless
authorThomas Gleixner <tglx@linutronix.de>
Mon, 12 Sep 2011 11:32:23 +0000 (13:32 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Tue, 13 Sep 2011 07:58:29 +0000 (09:58 +0200)
KGDB needs to trylock watchdog_lock when trying to reset the
clocksource watchdog after the system has been stopped to avoid a
potential deadlock. When the trylock fails TSC usually becomes
unstable.

We can be more clever by using an atomic counter and checking it in
the clocksource_watchdog callback. We restart the watchdog whenever
the counter is > 0 and only decrement the counter when we ran through
a full update cycle.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <johnstul@us.ibm.com>
Acked-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1109121326280.2723@ionos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
kernel/time/clocksource.c

index e0980f0d9a0ad2d559b98c12f26317b55044cad1..cf52fda2e0966d005e152ff8747d5d44db5fbd7a 100644 (file)
@@ -186,6 +186,7 @@ static struct timer_list watchdog_timer;
 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
 static DEFINE_SPINLOCK(watchdog_lock);
 static int watchdog_running;
+static atomic_t watchdog_reset_pending;
 
 static int clocksource_watchdog_kthread(void *data);
 static void __clocksource_change_rating(struct clocksource *cs, int rating);
@@ -247,12 +248,14 @@ static void clocksource_watchdog(unsigned long data)
        struct clocksource *cs;
        cycle_t csnow, wdnow;
        int64_t wd_nsec, cs_nsec;
-       int next_cpu;
+       int next_cpu, reset_pending;
 
        spin_lock(&watchdog_lock);
        if (!watchdog_running)
                goto out;
 
+       reset_pending = atomic_read(&watchdog_reset_pending);
+
        list_for_each_entry(cs, &watchdog_list, wd_list) {
 
                /* Clocksource already marked unstable? */
@@ -268,7 +271,8 @@ static void clocksource_watchdog(unsigned long data)
                local_irq_enable();
 
                /* Clocksource initialized ? */
-               if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
+               if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
+                   atomic_read(&watchdog_reset_pending)) {
                        cs->flags |= CLOCK_SOURCE_WATCHDOG;
                        cs->wd_last = wdnow;
                        cs->cs_last = csnow;
@@ -283,8 +287,11 @@ static void clocksource_watchdog(unsigned long data)
                cs->cs_last = csnow;
                cs->wd_last = wdnow;
 
+               if (atomic_read(&watchdog_reset_pending))
+                       continue;
+
                /* Check the deviation from the watchdog clocksource. */
-               if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
+               if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
                        clocksource_unstable(cs, cs_nsec - wd_nsec);
                        continue;
                }
@@ -302,6 +309,13 @@ static void clocksource_watchdog(unsigned long data)
                }
        }
 
+       /*
+        * We only clear the watchdog_reset_pending, when we did a
+        * full cycle through all clocksources.
+        */
+       if (reset_pending)
+               atomic_dec(&watchdog_reset_pending);
+
        /*
         * Cycle through CPUs to check if the CPUs stay synchronized
         * to each other.
@@ -344,23 +358,7 @@ static inline void clocksource_reset_watchdog(void)
 
 static void clocksource_resume_watchdog(void)
 {
-       unsigned long flags;
-
-       /*
-        * We use trylock here to avoid a potential dead lock when
-        * kgdb calls this code after the kernel has been stopped with
-        * watchdog_lock held. When watchdog_lock is held we just
-        * return and accept, that the watchdog might trigger and mark
-        * the monitored clock source (usually TSC) unstable.
-        *
-        * This does not affect the other caller clocksource_resume()
-        * because at this point the kernel is UP, interrupts are
-        * disabled and nothing can hold watchdog_lock.
-        */
-       if (!spin_trylock_irqsave(&watchdog_lock, flags))
-               return;
-       clocksource_reset_watchdog();
-       spin_unlock_irqrestore(&watchdog_lock, flags);
+       atomic_inc(&watchdog_reset_pending);
 }
 
 static void clocksource_enqueue_watchdog(struct clocksource *cs)