Merge branch 'android-4.14-spl-topic' into exynos9609
[GitHub/moto-9609/android_kernel_motorola_exynos9610.git] / kernel / watchdog.c
index 6bcb854909c0b6f1563469ae3517fa1f89c43268..98a67a3f19de9cfad3aec5a7b73280e291289ae8 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Detect hard and soft lockups on a system
  *
@@ -31,7 +32,8 @@
 
 static DEFINE_MUTEX(watchdog_mutex);
 
-#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) \
+       || defined(CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU)
 # define WATCHDOG_DEFAULT      (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
 # define NMI_WATCHDOG_DEFAULT  1
 #else
@@ -45,13 +47,16 @@ int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
 int __read_mostly soft_watchdog_user_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
 int __read_mostly nmi_watchdog_available;
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU)
+int __read_mostly watchdog_other_cpu_available = WATCHDOG_DEFAULT;
+#endif
 
 struct cpumask watchdog_allowed_mask __read_mostly;
 
 struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU)
 /*
  * Should we panic when a soft-lockup or hard-lockup occurs:
  */
@@ -104,6 +109,11 @@ __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
  * softlockup watchdog threads start and stop. The arch must select the
  * SOFTLOCKUP_DETECTOR Kconfig.
  */
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static int watchdog_nmi_enable(unsigned int cpu);
+static void watchdog_nmi_disable(unsigned int cpu);
+#else
 int __weak watchdog_nmi_enable(unsigned int cpu)
 {
        hardlockup_detector_perf_enable();
@@ -114,6 +124,7 @@ void __weak watchdog_nmi_disable(unsigned int cpu)
 {
        hardlockup_detector_perf_disable();
 }
+#endif
 
 /* Return 0, if a NMI watchdog is available. Error code otherwise */
 int __weak __init watchdog_nmi_probe(void)
@@ -155,6 +166,10 @@ static void lockup_detector_update_enable(void)
        watchdog_enabled = 0;
        if (!watchdog_user_enabled)
                return;
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU)
+       if (watchdog_other_cpu_available && nmi_watchdog_user_enabled)
+               watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+#endif
        if (nmi_watchdog_available && nmi_watchdog_user_enabled)
                watchdog_enabled |= NMI_WATCHDOG_ENABLED;
        if (soft_watchdog_user_enabled)
@@ -169,8 +184,10 @@ unsigned int __read_mostly softlockup_panic =
 
 static bool softlockup_threads_initialized __read_mostly;
 static u64 __read_mostly sample_period;
+static unsigned long __read_mostly hardlockup_thresh;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
+static DEFINE_PER_CPU(unsigned long, hardlockup_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
@@ -248,12 +265,14 @@ static void set_sample_period(void)
         */
        sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
        watchdog_update_hrtimer_threshold(sample_period);
+       hardlockup_thresh = sample_period * 3 / NSEC_PER_SEC;
 }
 
 /* Commands for resetting the watchdog */
 static void __touch_watchdog(void)
 {
        __this_cpu_write(watchdog_touch_ts, get_timestamp());
+       __this_cpu_write(hardlockup_touch_ts, get_timestamp());
 }
 
 /**
@@ -304,6 +323,12 @@ void touch_softlockup_watchdog_sync(void)
        __this_cpu_write(watchdog_touch_ts, 0);
 }
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static void watchdog_check_hardlockup_other_cpu(void);
+#else
+static inline void watchdog_check_hardlockup_other_cpu(void) { return; }
+#endif
+
 static int is_softlockup(unsigned long touch_ts)
 {
        unsigned long now = get_timestamp();
@@ -341,12 +366,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
        int duration;
        int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
 
+       /* try to enable log_kevent of exynos-snapshot if log_kevent was off because of rcu stall */
+       dbg_snapshot_try_enable("log_kevent", NSEC_PER_SEC * 15);
        if (!watchdog_enabled)
                return HRTIMER_NORESTART;
 
        /* kick the hardlockup detector */
        watchdog_interrupt_count();
 
+       /* test for hardlockups on the next cpu */
+       watchdog_check_hardlockup_other_cpu();
+
        /* kick the softlockup detector */
        wake_up_process(__this_cpu_read(softlockup_watchdog));
 
@@ -787,3 +817,134 @@ void __init lockup_detector_init(void)
                nmi_watchdog_available = true;
        lockup_detector_setup();
 }
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static DEFINE_PER_CPU(bool, hard_watchdog_warn);
+static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
+static cpumask_t __read_mostly watchdog_cpus;
+ATOMIC_NOTIFIER_HEAD(hardlockup_notifier_list);
+EXPORT_SYMBOL(hardlockup_notifier_list);
+
+static unsigned int watchdog_next_cpu(unsigned int cpu)
+{
+       cpumask_t cpus = watchdog_cpus;
+       unsigned int next_cpu;
+
+       next_cpu = cpumask_next(cpu, &cpus);
+       if (next_cpu >= nr_cpu_ids)
+               next_cpu = cpumask_first(&cpus);
+
+       if (next_cpu == cpu)
+               return nr_cpu_ids;
+
+       return next_cpu;
+}
+
+static int is_hardlockup_other_cpu(unsigned int cpu)
+{
+       unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
+
+       if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint) {
+               unsigned long now = get_timestamp();
+               unsigned long touch_ts = per_cpu(hardlockup_touch_ts, cpu);
+
+               if (time_after(now, touch_ts) &&
+                               (now - touch_ts >= hardlockup_thresh))
+                       return 1;
+       }
+
+       per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
+       return 0;
+}
+
+static void watchdog_check_hardlockup_other_cpu(void)
+{
+       unsigned int next_cpu;
+
+       /*
+        * Test for hardlockups every 3 samples.  The sample period is
+        *  watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
+        *  watchdog_thresh (over by 20%).
+        */
+       if (__this_cpu_read(hrtimer_interrupts) % 3 != 0)
+               return;
+
+       /* check for a hardlockup on the next cpu */
+       next_cpu = watchdog_next_cpu(smp_processor_id());
+       if (next_cpu >= nr_cpu_ids)
+               return;
+
+       smp_rmb();
+
+       if (per_cpu(watchdog_nmi_touch, next_cpu) == true) {
+               per_cpu(watchdog_nmi_touch, next_cpu) = false;
+               return;
+       }
+
+       if (is_hardlockup_other_cpu(next_cpu)) {
+               /* only warn once */
+               if (per_cpu(hard_watchdog_warn, next_cpu) == true)
+                       return;
+
+               if (hardlockup_panic) {
+                       dbg_snapshot_set_hardlockup(hardlockup_panic);
+                       atomic_notifier_call_chain(&hardlockup_notifier_list, 0, (void *)&next_cpu);
+                       panic("Watchdog detected hard LOCKUP on cpu %u", next_cpu);
+               } else {
+                       WARN(1, "Watchdog detected hard LOCKUP on cpu %u", next_cpu);
+               }
+
+               per_cpu(hard_watchdog_warn, next_cpu) = true;
+       } else {
+               per_cpu(hard_watchdog_warn, next_cpu) = false;
+       }
+}
+
+void touch_nmi_watchdog(void)
+{
+       /*
+        * Using __raw here because some code paths have
+        * preemption enabled.  If preemption is enabled
+        * then interrupts should be enabled too, in which
+        * case we shouldn't have to worry about the watchdog
+        * going off.
+        */
+       raw_cpu_write(watchdog_nmi_touch, true);
+       arch_touch_nmi_watchdog();
+       touch_softlockup_watchdog();
+}
+EXPORT_SYMBOL(touch_nmi_watchdog);
+
+static int watchdog_nmi_enable(unsigned int cpu)
+{
+       /*
+        * The new cpu will be marked online before the first hrtimer interrupt
+        * runs on it.  If another cpu tests for a hardlockup on the new cpu
+        * before it has run its first hrtimer, it will get a false positive.
+        * Touch the watchdog on the new cpu to delay the first check for at
+        * least 3 sampling periods to guarantee one hrtimer has run on the new
+        * cpu.
+        */
+       per_cpu(watchdog_nmi_touch, cpu) = true;
+       smp_wmb();
+       cpumask_set_cpu(cpu, &watchdog_cpus);
+       return 0;
+}
+
+static void watchdog_nmi_disable(unsigned int cpu)
+{
+       unsigned int next_cpu = watchdog_next_cpu(cpu);
+
+       /*
+        * Offlining this cpu will cause the cpu before this one to start
+        * checking the one after this one.  If this cpu just finished checking
+        * the next cpu and updating hrtimer_interrupts_saved, and then the
+        * previous cpu checks it within one sample period, it will trigger a
+        * false positive.  Touch the watchdog on the next cpu to prevent it.
+        */
+       if (next_cpu < nr_cpu_ids)
+               per_cpu(watchdog_nmi_touch, next_cpu) = true;
+       smp_wmb();
+       cpumask_clear_cpu(cpu, &watchdog_cpus);
+}
+#endif