time/timers: Move all time(r) related files into kernel/time
authorThomas Gleixner <tglx@linutronix.de>
Sun, 22 Jun 2014 10:06:40 +0000 (12:06 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Mon, 23 Jun 2014 09:22:35 +0000 (11:22 +0200)
Except for Kconfig.HZ. That needs a separate treatment.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
17 files changed:
MAINTAINERS
kernel/Makefile
kernel/hrtimer.c [deleted file]
kernel/itimer.c [deleted file]
kernel/posix-cpu-timers.c [deleted file]
kernel/posix-timers.c [deleted file]
kernel/time.c [deleted file]
kernel/time/Makefile
kernel/time/hrtimer.c [new file with mode: 0644]
kernel/time/itimer.c [new file with mode: 0644]
kernel/time/posix-cpu-timers.c [new file with mode: 0644]
kernel/time/posix-timers.c [new file with mode: 0644]
kernel/time/time.c [new file with mode: 0644]
kernel/time/timeconst.bc [new file with mode: 0644]
kernel/time/timer.c [new file with mode: 0644]
kernel/timeconst.bc [deleted file]
kernel/timer.c [deleted file]

index 1b22565c59acfb8361f32cae50c63b79bf12878f..970c4a07a9ab72a9ac5db60e4b1ca4e6263da1e3 100644 (file)
@@ -4147,7 +4147,7 @@ L:        linux-kernel@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
 S:     Maintained
 F:     Documentation/timers/
-F:     kernel/hrtimer.c
+F:     kernel/time/hrtimer.c
 F:     kernel/time/clockevents.c
 F:     kernel/time/tick*.*
 F:     kernel/time/timer_*.c
@@ -6945,10 +6945,10 @@ POSIX CLOCKS and TIMERS
 M:     Thomas Gleixner <tglx@linutronix.de>
 L:     linux-kernel@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
-S:     Supported
+S:     Maintained
 F:     fs/timerfd.c
 F:     include/linux/timer*
-F:     kernel/*timer*
+F:     kernel/time/*timer*
 
 POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS
 M:     Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
index f2a8b6246ce935e9d4ae5aeddc314e3d281c8539..973a40cf806814ef69b0a7b8d844ab5ec8392007 100644 (file)
@@ -3,12 +3,11 @@
 #
 
 obj-y     = fork.o exec_domain.o panic.o \
-           cpu.o exit.o itimer.o time.o softirq.o resource.o \
-           sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
+           cpu.o exit.o softirq.o resource.o \
+           sysctl.o sysctl_binary.o capability.o ptrace.o user.o \
            signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
-           extable.o params.o posix-timers.o \
-           kthread.o sys_ni.o posix-cpu-timers.o \
-           hrtimer.o nsproxy.o \
+           extable.o params.o \
+           kthread.o sys_ni.o nsproxy.o \
            notifier.o ksysfs.o cred.o reboot.o \
            async.o range.o groups.o smpboot.o
 
@@ -110,22 +109,6 @@ targets += config_data.h
 $(obj)/config_data.h: $(obj)/config_data.gz FORCE
        $(call filechk,ikconfiggz)
 
-$(obj)/time.o: $(obj)/timeconst.h
-
-quiet_cmd_hzfile = HZFILE  $@
-      cmd_hzfile = echo "hz=$(CONFIG_HZ)" > $@
-
-targets += hz.bc
-$(obj)/hz.bc: $(objtree)/include/config/hz.h FORCE
-       $(call if_changed,hzfile)
-
-quiet_cmd_bc  = BC      $@
-      cmd_bc  = bc -q $(filter-out FORCE,$^) > $@
-
-targets += timeconst.h
-$(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
-       $(call if_changed,bc)
-
 ###############################################################################
 #
 # Roll all the X.509 certificates that we can find together and pull them into
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
deleted file mode 100644 (file)
index 3ab2899..0000000
+++ /dev/null
@@ -1,1915 +0,0 @@
-/*
- *  linux/kernel/hrtimer.c
- *
- *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
- *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
- *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
- *
- *  High-resolution kernel timers
- *
- *  In contrast to the low-resolution timeout API implemented in
- *  kernel/timer.c, hrtimers provide finer resolution and accuracy
- *  depending on system configuration and capabilities.
- *
- *  These timers are currently used for:
- *   - itimers
- *   - POSIX timers
- *   - nanosleep
- *   - precise in-kernel timing
- *
- *  Started by: Thomas Gleixner and Ingo Molnar
- *
- *  Credits:
- *     based on kernel/timer.c
- *
- *     Help, testing, suggestions, bugfixes, improvements were
- *     provided by:
- *
- *     George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
- *     et. al.
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/cpu.h>
-#include <linux/export.h>
-#include <linux/percpu.h>
-#include <linux/hrtimer.h>
-#include <linux/notifier.h>
-#include <linux/syscalls.h>
-#include <linux/kallsyms.h>
-#include <linux/interrupt.h>
-#include <linux/tick.h>
-#include <linux/seq_file.h>
-#include <linux/err.h>
-#include <linux/debugobjects.h>
-#include <linux/sched.h>
-#include <linux/sched/sysctl.h>
-#include <linux/sched/rt.h>
-#include <linux/sched/deadline.h>
-#include <linux/timer.h>
-#include <linux/freezer.h>
-
-#include <asm/uaccess.h>
-
-#include <trace/events/timer.h>
-
-/*
- * The timer bases:
- *
- * There are more clockids then hrtimer bases. Thus, we index
- * into the timer bases by the hrtimer_base_type enum. When trying
- * to reach a base using a clockid, hrtimer_clockid_to_base()
- * is used to convert from clockid to the proper hrtimer_base_type.
- */
-DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
-{
-
-       .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
-       .clock_base =
-       {
-               {
-                       .index = HRTIMER_BASE_MONOTONIC,
-                       .clockid = CLOCK_MONOTONIC,
-                       .get_time = &ktime_get,
-                       .resolution = KTIME_LOW_RES,
-               },
-               {
-                       .index = HRTIMER_BASE_REALTIME,
-                       .clockid = CLOCK_REALTIME,
-                       .get_time = &ktime_get_real,
-                       .resolution = KTIME_LOW_RES,
-               },
-               {
-                       .index = HRTIMER_BASE_BOOTTIME,
-                       .clockid = CLOCK_BOOTTIME,
-                       .get_time = &ktime_get_boottime,
-                       .resolution = KTIME_LOW_RES,
-               },
-               {
-                       .index = HRTIMER_BASE_TAI,
-                       .clockid = CLOCK_TAI,
-                       .get_time = &ktime_get_clocktai,
-                       .resolution = KTIME_LOW_RES,
-               },
-       }
-};
-
-static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
-       [CLOCK_REALTIME]        = HRTIMER_BASE_REALTIME,
-       [CLOCK_MONOTONIC]       = HRTIMER_BASE_MONOTONIC,
-       [CLOCK_BOOTTIME]        = HRTIMER_BASE_BOOTTIME,
-       [CLOCK_TAI]             = HRTIMER_BASE_TAI,
-};
-
-static inline int hrtimer_clockid_to_base(clockid_t clock_id)
-{
-       return hrtimer_clock_to_base_table[clock_id];
-}
-
-
-/*
- * Get the coarse grained time at the softirq based on xtime and
- * wall_to_monotonic.
- */
-static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
-{
-       ktime_t xtim, mono, boot;
-       struct timespec xts, tom, slp;
-       s32 tai_offset;
-
-       get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
-       tai_offset = timekeeping_get_tai_offset();
-
-       xtim = timespec_to_ktime(xts);
-       mono = ktime_add(xtim, timespec_to_ktime(tom));
-       boot = ktime_add(mono, timespec_to_ktime(slp));
-       base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
-       base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
-       base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
-       base->clock_base[HRTIMER_BASE_TAI].softirq_time =
-                               ktime_add(xtim, ktime_set(tai_offset, 0));
-}
-
-/*
- * Functions and macros which are different for UP/SMP systems are kept in a
- * single place
- */
-#ifdef CONFIG_SMP
-
-/*
- * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
- * means that all timers which are tied to this base via timer->base are
- * locked, and the base itself is locked too.
- *
- * So __run_timers/migrate_timers can safely modify all timers which could
- * be found on the lists/queues.
- *
- * When the timer's base is locked, and the timer removed from list, it is
- * possible to set timer->base = NULL and drop the lock: the timer remains
- * locked.
- */
-static
-struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
-                                            unsigned long *flags)
-{
-       struct hrtimer_clock_base *base;
-
-       for (;;) {
-               base = timer->base;
-               if (likely(base != NULL)) {
-                       raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
-                       if (likely(base == timer->base))
-                               return base;
-                       /* The timer has migrated to another CPU: */
-                       raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
-               }
-               cpu_relax();
-       }
-}
-
-/*
- * With HIGHRES=y we do not migrate the timer when it is expiring
- * before the next event on the target cpu because we cannot reprogram
- * the target cpu hardware and we would cause it to fire late.
- *
- * Called with cpu_base->lock of target cpu held.
- */
-static int
-hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
-{
-#ifdef CONFIG_HIGH_RES_TIMERS
-       ktime_t expires;
-
-       if (!new_base->cpu_base->hres_active)
-               return 0;
-
-       expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
-       return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
-#else
-       return 0;
-#endif
-}
-
-/*
- * Switch the timer base to the current CPU when possible.
- */
-static inline struct hrtimer_clock_base *
-switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
-                   int pinned)
-{
-       struct hrtimer_clock_base *new_base;
-       struct hrtimer_cpu_base *new_cpu_base;
-       int this_cpu = smp_processor_id();
-       int cpu = get_nohz_timer_target(pinned);
-       int basenum = base->index;
-
-again:
-       new_cpu_base = &per_cpu(hrtimer_bases, cpu);
-       new_base = &new_cpu_base->clock_base[basenum];
-
-       if (base != new_base) {
-               /*
-                * We are trying to move timer to new_base.
-                * However we can't change timer's base while it is running,
-                * so we keep it on the same CPU. No hassle vs. reprogramming
-                * the event source in the high resolution case. The softirq
-                * code will take care of this when the timer function has
-                * completed. There is no conflict as we hold the lock until
-                * the timer is enqueued.
-                */
-               if (unlikely(hrtimer_callback_running(timer)))
-                       return base;
-
-               /* See the comment in lock_timer_base() */
-               timer->base = NULL;
-               raw_spin_unlock(&base->cpu_base->lock);
-               raw_spin_lock(&new_base->cpu_base->lock);
-
-               if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
-                       cpu = this_cpu;
-                       raw_spin_unlock(&new_base->cpu_base->lock);
-                       raw_spin_lock(&base->cpu_base->lock);
-                       timer->base = base;
-                       goto again;
-               }
-               timer->base = new_base;
-       } else {
-               if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
-                       cpu = this_cpu;
-                       goto again;
-               }
-       }
-       return new_base;
-}
-
-#else /* CONFIG_SMP */
-
-static inline struct hrtimer_clock_base *
-lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
-{
-       struct hrtimer_clock_base *base = timer->base;
-
-       raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
-
-       return base;
-}
-
-# define switch_hrtimer_base(t, b, p)  (b)
-
-#endif /* !CONFIG_SMP */
-
-/*
- * Functions for the union type storage format of ktime_t which are
- * too large for inlining:
- */
-#if BITS_PER_LONG < 64
-# ifndef CONFIG_KTIME_SCALAR
-/**
- * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
- * @kt:                addend
- * @nsec:      the scalar nsec value to add
- *
- * Returns the sum of kt and nsec in ktime_t format
- */
-ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
-{
-       ktime_t tmp;
-
-       if (likely(nsec < NSEC_PER_SEC)) {
-               tmp.tv64 = nsec;
-       } else {
-               unsigned long rem = do_div(nsec, NSEC_PER_SEC);
-
-               /* Make sure nsec fits into long */
-               if (unlikely(nsec > KTIME_SEC_MAX))
-                       return (ktime_t){ .tv64 = KTIME_MAX };
-
-               tmp = ktime_set((long)nsec, rem);
-       }
-
-       return ktime_add(kt, tmp);
-}
-
-EXPORT_SYMBOL_GPL(ktime_add_ns);
-
-/**
- * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
- * @kt:                minuend
- * @nsec:      the scalar nsec value to subtract
- *
- * Returns the subtraction of @nsec from @kt in ktime_t format
- */
-ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
-{
-       ktime_t tmp;
-
-       if (likely(nsec < NSEC_PER_SEC)) {
-               tmp.tv64 = nsec;
-       } else {
-               unsigned long rem = do_div(nsec, NSEC_PER_SEC);
-
-               tmp = ktime_set((long)nsec, rem);
-       }
-
-       return ktime_sub(kt, tmp);
-}
-
-EXPORT_SYMBOL_GPL(ktime_sub_ns);
-# endif /* !CONFIG_KTIME_SCALAR */
-
-/*
- * Divide a ktime value by a nanosecond value
- */
-u64 ktime_divns(const ktime_t kt, s64 div)
-{
-       u64 dclc;
-       int sft = 0;
-
-       dclc = ktime_to_ns(kt);
-       /* Make sure the divisor is less than 2^32: */
-       while (div >> 32) {
-               sft++;
-               div >>= 1;
-       }
-       dclc >>= sft;
-       do_div(dclc, (unsigned long) div);
-
-       return dclc;
-}
-#endif /* BITS_PER_LONG >= 64 */
-
-/*
- * Add two ktime values and do a safety check for overflow:
- */
-ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
-{
-       ktime_t res = ktime_add(lhs, rhs);
-
-       /*
-        * We use KTIME_SEC_MAX here, the maximum timeout which we can
-        * return to user space in a timespec:
-        */
-       if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
-               res = ktime_set(KTIME_SEC_MAX, 0);
-
-       return res;
-}
-
-EXPORT_SYMBOL_GPL(ktime_add_safe);
-
-#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
-
-static struct debug_obj_descr hrtimer_debug_descr;
-
-static void *hrtimer_debug_hint(void *addr)
-{
-       return ((struct hrtimer *) addr)->function;
-}
-
-/*
- * fixup_init is called when:
- * - an active object is initialized
- */
-static int hrtimer_fixup_init(void *addr, enum debug_obj_state state)
-{
-       struct hrtimer *timer = addr;
-
-       switch (state) {
-       case ODEBUG_STATE_ACTIVE:
-               hrtimer_cancel(timer);
-               debug_object_init(timer, &hrtimer_debug_descr);
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-/*
- * fixup_activate is called when:
- * - an active object is activated
- * - an unknown object is activated (might be a statically initialized object)
- */
-static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
-{
-       switch (state) {
-
-       case ODEBUG_STATE_NOTAVAILABLE:
-               WARN_ON_ONCE(1);
-               return 0;
-
-       case ODEBUG_STATE_ACTIVE:
-               WARN_ON(1);
-
-       default:
-               return 0;
-       }
-}
-
-/*
- * fixup_free is called when:
- * - an active object is freed
- */
-static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
-{
-       struct hrtimer *timer = addr;
-
-       switch (state) {
-       case ODEBUG_STATE_ACTIVE:
-               hrtimer_cancel(timer);
-               debug_object_free(timer, &hrtimer_debug_descr);
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-static struct debug_obj_descr hrtimer_debug_descr = {
-       .name           = "hrtimer",
-       .debug_hint     = hrtimer_debug_hint,
-       .fixup_init     = hrtimer_fixup_init,
-       .fixup_activate = hrtimer_fixup_activate,
-       .fixup_free     = hrtimer_fixup_free,
-};
-
-static inline void debug_hrtimer_init(struct hrtimer *timer)
-{
-       debug_object_init(timer, &hrtimer_debug_descr);
-}
-
-static inline void debug_hrtimer_activate(struct hrtimer *timer)
-{
-       debug_object_activate(timer, &hrtimer_debug_descr);
-}
-
-static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
-{
-       debug_object_deactivate(timer, &hrtimer_debug_descr);
-}
-
-static inline void debug_hrtimer_free(struct hrtimer *timer)
-{
-       debug_object_free(timer, &hrtimer_debug_descr);
-}
-
-static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
-                          enum hrtimer_mode mode);
-
-void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
-                          enum hrtimer_mode mode)
-{
-       debug_object_init_on_stack(timer, &hrtimer_debug_descr);
-       __hrtimer_init(timer, clock_id, mode);
-}
-EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
-
-void destroy_hrtimer_on_stack(struct hrtimer *timer)
-{
-       debug_object_free(timer, &hrtimer_debug_descr);
-}
-
-#else
-static inline void debug_hrtimer_init(struct hrtimer *timer) { }
-static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
-static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
-#endif
-
-static inline void
-debug_init(struct hrtimer *timer, clockid_t clockid,
-          enum hrtimer_mode mode)
-{
-       debug_hrtimer_init(timer);
-       trace_hrtimer_init(timer, clockid, mode);
-}
-
-static inline void debug_activate(struct hrtimer *timer)
-{
-       debug_hrtimer_activate(timer);
-       trace_hrtimer_start(timer);
-}
-
-static inline void debug_deactivate(struct hrtimer *timer)
-{
-       debug_hrtimer_deactivate(timer);
-       trace_hrtimer_cancel(timer);
-}
-
-/* High resolution timer related functions */
-#ifdef CONFIG_HIGH_RES_TIMERS
-
-/*
- * High resolution timer enabled ?
- */
-static int hrtimer_hres_enabled __read_mostly  = 1;
-
-/*
- * Enable / Disable high resolution mode
- */
-static int __init setup_hrtimer_hres(char *str)
-{
-       if (!strcmp(str, "off"))
-               hrtimer_hres_enabled = 0;
-       else if (!strcmp(str, "on"))
-               hrtimer_hres_enabled = 1;
-       else
-               return 0;
-       return 1;
-}
-
-__setup("highres=", setup_hrtimer_hres);
-
-/*
- * hrtimer_high_res_enabled - query, if the highres mode is enabled
- */
-static inline int hrtimer_is_hres_enabled(void)
-{
-       return hrtimer_hres_enabled;
-}
-
-/*
- * Is the high resolution mode active ?
- */
-static inline int hrtimer_hres_active(void)
-{
-       return __this_cpu_read(hrtimer_bases.hres_active);
-}
-
-/*
- * Reprogram the event source with checking both queues for the
- * next event
- * Called with interrupts disabled and base->lock held
- */
-static void
-hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
-{
-       int i;
-       struct hrtimer_clock_base *base = cpu_base->clock_base;
-       ktime_t expires, expires_next;
-
-       expires_next.tv64 = KTIME_MAX;
-
-       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
-               struct hrtimer *timer;
-               struct timerqueue_node *next;
-
-               next = timerqueue_getnext(&base->active);
-               if (!next)
-                       continue;
-               timer = container_of(next, struct hrtimer, node);
-
-               expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
-               /*
-                * clock_was_set() has changed base->offset so the
-                * result might be negative. Fix it up to prevent a
-                * false positive in clockevents_program_event()
-                */
-               if (expires.tv64 < 0)
-                       expires.tv64 = 0;
-               if (expires.tv64 < expires_next.tv64)
-                       expires_next = expires;
-       }
-
-       if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
-               return;
-
-       cpu_base->expires_next.tv64 = expires_next.tv64;
-
-       /*
-        * If a hang was detected in the last timer interrupt then we
-        * leave the hang delay active in the hardware. We want the
-        * system to make progress. That also prevents the following
-        * scenario:
-        * T1 expires 50ms from now
-        * T2 expires 5s from now
-        *
-        * T1 is removed, so this code is called and would reprogram
-        * the hardware to 5s from now. Any hrtimer_start after that
-        * will not reprogram the hardware due to hang_detected being
-        * set. So we'd effectivly block all timers until the T2 event
-        * fires.
-        */
-       if (cpu_base->hang_detected)
-               return;
-
-       if (cpu_base->expires_next.tv64 != KTIME_MAX)
-               tick_program_event(cpu_base->expires_next, 1);
-}
-
-/*
- * Shared reprogramming for clock_realtime and clock_monotonic
- *
- * When a timer is enqueued and expires earlier than the already enqueued
- * timers, we have to check, whether it expires earlier than the timer for
- * which the clock event device was armed.
- *
- * Called with interrupts disabled and base->cpu_base.lock held
- */
-static int hrtimer_reprogram(struct hrtimer *timer,
-                            struct hrtimer_clock_base *base)
-{
-       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-       ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
-       int res;
-
-       WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
-
-       /*
-        * When the callback is running, we do not reprogram the clock event
-        * device. The timer callback is either running on a different CPU or
-        * the callback is executed in the hrtimer_interrupt context. The
-        * reprogramming is handled either by the softirq, which called the
-        * callback or at the end of the hrtimer_interrupt.
-        */
-       if (hrtimer_callback_running(timer))
-               return 0;
-
-       /*
-        * CLOCK_REALTIME timer might be requested with an absolute
-        * expiry time which is less than base->offset. Nothing wrong
-        * about that, just avoid to call into the tick code, which
-        * has now objections against negative expiry values.
-        */
-       if (expires.tv64 < 0)
-               return -ETIME;
-
-       if (expires.tv64 >= cpu_base->expires_next.tv64)
-               return 0;
-
-       /*
-        * If a hang was detected in the last timer interrupt then we
-        * do not schedule a timer which is earlier than the expiry
-        * which we enforced in the hang detection. We want the system
-        * to make progress.
-        */
-       if (cpu_base->hang_detected)
-               return 0;
-
-       /*
-        * Clockevents returns -ETIME, when the event was in the past.
-        */
-       res = tick_program_event(expires, 0);
-       if (!IS_ERR_VALUE(res))
-               cpu_base->expires_next = expires;
-       return res;
-}
-
-/*
- * Initialize the high resolution related parts of cpu_base
- */
-static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
-{
-       base->expires_next.tv64 = KTIME_MAX;
-       base->hres_active = 0;
-}
-
-/*
- * When High resolution timers are active, try to reprogram. Note, that in case
- * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
- * check happens. The timer gets enqueued into the rbtree. The reprogramming
- * and expiry check is done in the hrtimer_interrupt or in the softirq.
- */
-static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
-                                           struct hrtimer_clock_base *base)
-{
-       return base->cpu_base->hres_active && hrtimer_reprogram(timer, base);
-}
-
-static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
-{
-       ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
-       ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
-       ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
-
-       return ktime_get_update_offsets(offs_real, offs_boot, offs_tai);
-}
-
-/*
- * Retrigger next event is called after clock was set
- *
- * Called with interrupts disabled via on_each_cpu()
- */
-static void retrigger_next_event(void *arg)
-{
-       struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
-
-       if (!hrtimer_hres_active())
-               return;
-
-       raw_spin_lock(&base->lock);
-       hrtimer_update_base(base);
-       hrtimer_force_reprogram(base, 0);
-       raw_spin_unlock(&base->lock);
-}
-
-/*
- * Switch to high resolution mode
- */
-static int hrtimer_switch_to_hres(void)
-{
-       int i, cpu = smp_processor_id();
-       struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
-       unsigned long flags;
-
-       if (base->hres_active)
-               return 1;
-
-       local_irq_save(flags);
-
-       if (tick_init_highres()) {
-               local_irq_restore(flags);
-               printk(KERN_WARNING "Could not switch to high resolution "
-                                   "mode on CPU %d\n", cpu);
-               return 0;
-       }
-       base->hres_active = 1;
-       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
-               base->clock_base[i].resolution = KTIME_HIGH_RES;
-
-       tick_setup_sched_timer();
-       /* "Retrigger" the interrupt to get things going */
-       retrigger_next_event(NULL);
-       local_irq_restore(flags);
-       return 1;
-}
-
-static void clock_was_set_work(struct work_struct *work)
-{
-       clock_was_set();
-}
-
-static DECLARE_WORK(hrtimer_work, clock_was_set_work);
-
-/*
- * Called from timekeeping and resume code to reprogramm the hrtimer
- * interrupt device on all cpus.
- */
-void clock_was_set_delayed(void)
-{
-       schedule_work(&hrtimer_work);
-}
-
-#else
-
-static inline int hrtimer_hres_active(void) { return 0; }
-static inline int hrtimer_is_hres_enabled(void) { return 0; }
-static inline int hrtimer_switch_to_hres(void) { return 0; }
-static inline void
-hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
-static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
-                                           struct hrtimer_clock_base *base)
-{
-       return 0;
-}
-static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
-static inline void retrigger_next_event(void *arg) { }
-
-#endif /* CONFIG_HIGH_RES_TIMERS */
-
-/*
- * Clock realtime was set
- *
- * Change the offset of the realtime clock vs. the monotonic
- * clock.
- *
- * We might have to reprogram the high resolution timer interrupt. On
- * SMP we call the architecture specific code to retrigger _all_ high
- * resolution timer interrupts. On UP we just disable interrupts and
- * call the high resolution interrupt code.
- */
-void clock_was_set(void)
-{
-#ifdef CONFIG_HIGH_RES_TIMERS
-       /* Retrigger the CPU local events everywhere */
-       on_each_cpu(retrigger_next_event, NULL, 1);
-#endif
-       timerfd_clock_was_set();
-}
-
-/*
- * During resume we might have to reprogram the high resolution timer
- * interrupt on all online CPUs.  However, all other CPUs will be
- * stopped with IRQs interrupts disabled so the clock_was_set() call
- * must be deferred.
- */
-void hrtimers_resume(void)
-{
-       WARN_ONCE(!irqs_disabled(),
-                 KERN_INFO "hrtimers_resume() called with IRQs enabled!");
-
-       /* Retrigger on the local CPU */
-       retrigger_next_event(NULL);
-       /* And schedule a retrigger for all others */
-       clock_was_set_delayed();
-}
-
-static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
-       if (timer->start_site)
-               return;
-       timer->start_site = __builtin_return_address(0);
-       memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
-       timer->start_pid = current->pid;
-#endif
-}
-
-static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
-       timer->start_site = NULL;
-#endif
-}
-
-static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
-       if (likely(!timer_stats_active))
-               return;
-       timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
-                                timer->function, timer->start_comm, 0);
-#endif
-}
-
-/*
- * Counterpart to lock_hrtimer_base above:
- */
-static inline
-void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
-{
-       raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
-}
-
-/**
- * hrtimer_forward - forward the timer expiry
- * @timer:     hrtimer to forward
- * @now:       forward past this time
- * @interval:  the interval to forward
- *
- * Forward the timer expiry so it will expire in the future.
- * Returns the number of overruns.
- */
-u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
-{
-       u64 orun = 1;
-       ktime_t delta;
-
-       delta = ktime_sub(now, hrtimer_get_expires(timer));
-
-       if (delta.tv64 < 0)
-               return 0;
-
-       if (interval.tv64 < timer->base->resolution.tv64)
-               interval.tv64 = timer->base->resolution.tv64;
-
-       if (unlikely(delta.tv64 >= interval.tv64)) {
-               s64 incr = ktime_to_ns(interval);
-
-               orun = ktime_divns(delta, incr);
-               hrtimer_add_expires_ns(timer, incr * orun);
-               if (hrtimer_get_expires_tv64(timer) > now.tv64)
-                       return orun;
-               /*
-                * This (and the ktime_add() below) is the
-                * correction for exact:
-                */
-               orun++;
-       }
-       hrtimer_add_expires(timer, interval);
-
-       return orun;
-}
-EXPORT_SYMBOL_GPL(hrtimer_forward);
-
-/*
- * enqueue_hrtimer - internal function to (re)start a timer
- *
- * The timer is inserted in expiry order. Insertion into the
- * red black tree is O(log(n)). Must hold the base lock.
- *
- * Returns 1 when the new timer is the leftmost timer in the tree.
- */
-static int enqueue_hrtimer(struct hrtimer *timer,
-                          struct hrtimer_clock_base *base)
-{
-       debug_activate(timer);
-
-       timerqueue_add(&base->active, &timer->node);
-       base->cpu_base->active_bases |= 1 << base->index;
-
-       /*
-        * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
-        * state of a possibly running callback.
-        */
-       timer->state |= HRTIMER_STATE_ENQUEUED;
-
-       return (&timer->node == base->active.next);
-}
-
-/*
- * __remove_hrtimer - internal function to remove a timer
- *
- * Caller must hold the base lock.
- *
- * High resolution timer mode reprograms the clock event device when the
- * timer is the one which expires next. The caller can disable this by setting
- * reprogram to zero. This is useful, when the context does a reprogramming
- * anyway (e.g. timer interrupt)
- */
-static void __remove_hrtimer(struct hrtimer *timer,
-                            struct hrtimer_clock_base *base,
-                            unsigned long newstate, int reprogram)
-{
-       struct timerqueue_node *next_timer;
-       if (!(timer->state & HRTIMER_STATE_ENQUEUED))
-               goto out;
-
-       next_timer = timerqueue_getnext(&base->active);
-       timerqueue_del(&base->active, &timer->node);
-       if (&timer->node == next_timer) {
-#ifdef CONFIG_HIGH_RES_TIMERS
-               /* Reprogram the clock event device. if enabled */
-               if (reprogram && hrtimer_hres_active()) {
-                       ktime_t expires;
-
-                       expires = ktime_sub(hrtimer_get_expires(timer),
-                                           base->offset);
-                       if (base->cpu_base->expires_next.tv64 == expires.tv64)
-                               hrtimer_force_reprogram(base->cpu_base, 1);
-               }
-#endif
-       }
-       if (!timerqueue_getnext(&base->active))
-               base->cpu_base->active_bases &= ~(1 << base->index);
-out:
-       timer->state = newstate;
-}
-
-/*
- * remove hrtimer, called with base lock held
- */
-static inline int
-remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
-{
-       if (hrtimer_is_queued(timer)) {
-               unsigned long state;
-               int reprogram;
-
-               /*
-                * Remove the timer and force reprogramming when high
-                * resolution mode is active and the timer is on the current
-                * CPU. If we remove a timer on another CPU, reprogramming is
-                * skipped. The interrupt event on this CPU is fired and
-                * reprogramming happens in the interrupt handler. This is a
-                * rare case and less expensive than a smp call.
-                */
-               debug_deactivate(timer);
-               timer_stats_hrtimer_clear_start_info(timer);
-               reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
-               /*
-                * We must preserve the CALLBACK state flag here,
-                * otherwise we could move the timer base in
-                * switch_hrtimer_base.
-                */
-               state = timer->state & HRTIMER_STATE_CALLBACK;
-               __remove_hrtimer(timer, base, state, reprogram);
-               return 1;
-       }
-       return 0;
-}
-
-int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
-               unsigned long delta_ns, const enum hrtimer_mode mode,
-               int wakeup)
-{
-       struct hrtimer_clock_base *base, *new_base;
-       unsigned long flags;
-       int ret, leftmost;
-
-       base = lock_hrtimer_base(timer, &flags);
-
-       /* Remove an active timer from the queue: */
-       ret = remove_hrtimer(timer, base);
-
-       if (mode & HRTIMER_MODE_REL) {
-               tim = ktime_add_safe(tim, base->get_time());
-               /*
-                * CONFIG_TIME_LOW_RES is a temporary way for architectures
-                * to signal that they simply return xtime in
-                * do_gettimeoffset(). In this case we want to round up by
-                * resolution when starting a relative timer, to avoid short
-                * timeouts. This will go away with the GTOD framework.
-                */
-#ifdef CONFIG_TIME_LOW_RES
-               tim = ktime_add_safe(tim, base->resolution);
-#endif
-       }
-
-       hrtimer_set_expires_range_ns(timer, tim, delta_ns);
-
-       /* Switch the timer base, if necessary: */
-       new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
-
-       timer_stats_hrtimer_set_start_info(timer);
-
-       leftmost = enqueue_hrtimer(timer, new_base);
-
-       /*
-        * Only allow reprogramming if the new base is on this CPU.
-        * (it might still be on another CPU if the timer was pending)
-        *
-        * XXX send_remote_softirq() ?
-        */
-       if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
-               && hrtimer_enqueue_reprogram(timer, new_base)) {
-               if (wakeup) {
-                       /*
-                        * We need to drop cpu_base->lock to avoid a
-                        * lock ordering issue vs. rq->lock.
-                        */
-                       raw_spin_unlock(&new_base->cpu_base->lock);
-                       raise_softirq_irqoff(HRTIMER_SOFTIRQ);
-                       local_irq_restore(flags);
-                       return ret;
-               } else {
-                       __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
-               }
-       }
-
-       unlock_hrtimer_base(timer, &flags);
-
-       return ret;
-}
-EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
-
-/**
- * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
- * @timer:     the timer to be added
- * @tim:       expiry time
- * @delta_ns:  "slack" range for the timer
- * @mode:      expiry mode: absolute (HRTIMER_MODE_ABS) or
- *             relative (HRTIMER_MODE_REL)
- *
- * Returns:
- *  0 on success
- *  1 when the timer was active
- */
-int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
-               unsigned long delta_ns, const enum hrtimer_mode mode)
-{
-       return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1);
-}
-EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
-
-/**
- * hrtimer_start - (re)start an hrtimer on the current CPU
- * @timer:     the timer to be added
- * @tim:       expiry time
- * @mode:      expiry mode: absolute (HRTIMER_MODE_ABS) or
- *             relative (HRTIMER_MODE_REL)
- *
- * Returns:
- *  0 on success
- *  1 when the timer was active
- */
-int
-hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
-{
-       return __hrtimer_start_range_ns(timer, tim, 0, mode, 1);
-}
-EXPORT_SYMBOL_GPL(hrtimer_start);
-
-
-/**
- * hrtimer_try_to_cancel - try to deactivate a timer
- * @timer:     hrtimer to stop
- *
- * Returns:
- *  0 when the timer was not active
- *  1 when the timer was active
- * -1 when the timer is currently excuting the callback function and
- *    cannot be stopped
- */
-int hrtimer_try_to_cancel(struct hrtimer *timer)
-{
-       struct hrtimer_clock_base *base;
-       unsigned long flags;
-       int ret = -1;
-
-       base = lock_hrtimer_base(timer, &flags);
-
-       if (!hrtimer_callback_running(timer))
-               ret = remove_hrtimer(timer, base);
-
-       unlock_hrtimer_base(timer, &flags);
-
-       return ret;
-
-}
-EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
-
-/**
- * hrtimer_cancel - cancel a timer and wait for the handler to finish.
- * @timer:     the timer to be cancelled
- *
- * Returns:
- *  0 when the timer was not active
- *  1 when the timer was active
- */
-int hrtimer_cancel(struct hrtimer *timer)
-{
-       for (;;) {
-               int ret = hrtimer_try_to_cancel(timer);
-
-               if (ret >= 0)
-                       return ret;
-               cpu_relax();
-       }
-}
-EXPORT_SYMBOL_GPL(hrtimer_cancel);
-
-/**
- * hrtimer_get_remaining - get remaining time for the timer
- * @timer:     the timer to read
- */
-ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
-{
-       unsigned long flags;
-       ktime_t rem;
-
-       lock_hrtimer_base(timer, &flags);
-       rem = hrtimer_expires_remaining(timer);
-       unlock_hrtimer_base(timer, &flags);
-
-       return rem;
-}
-EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
-
-#ifdef CONFIG_NO_HZ_COMMON
-/**
- * hrtimer_get_next_event - get the time until next expiry event
- *
- * Returns the delta to the next expiry event or KTIME_MAX if no timer
- * is pending.
- */
-ktime_t hrtimer_get_next_event(void)
-{
-       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-       struct hrtimer_clock_base *base = cpu_base->clock_base;
-       ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
-       unsigned long flags;
-       int i;
-
-       raw_spin_lock_irqsave(&cpu_base->lock, flags);
-
-       if (!hrtimer_hres_active()) {
-               for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
-                       struct hrtimer *timer;
-                       struct timerqueue_node *next;
-
-                       next = timerqueue_getnext(&base->active);
-                       if (!next)
-                               continue;
-
-                       timer = container_of(next, struct hrtimer, node);
-                       delta.tv64 = hrtimer_get_expires_tv64(timer);
-                       delta = ktime_sub(delta, base->get_time());
-                       if (delta.tv64 < mindelta.tv64)
-                               mindelta.tv64 = delta.tv64;
-               }
-       }
-
-       raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
-
-       if (mindelta.tv64 < 0)
-               mindelta.tv64 = 0;
-       return mindelta;
-}
-#endif
-
-static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
-                          enum hrtimer_mode mode)
-{
-       struct hrtimer_cpu_base *cpu_base;
-       int base;
-
-       memset(timer, 0, sizeof(struct hrtimer));
-
-       cpu_base = &__raw_get_cpu_var(hrtimer_bases);
-
-       if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
-               clock_id = CLOCK_MONOTONIC;
-
-       base = hrtimer_clockid_to_base(clock_id);
-       timer->base = &cpu_base->clock_base[base];
-       timerqueue_init(&timer->node);
-
-#ifdef CONFIG_TIMER_STATS
-       timer->start_site = NULL;
-       timer->start_pid = -1;
-       memset(timer->start_comm, 0, TASK_COMM_LEN);
-#endif
-}
-
-/**
- * hrtimer_init - initialize a timer to the given clock
- * @timer:     the timer to be initialized
- * @clock_id:  the clock to be used
- * @mode:      timer mode abs/rel
- */
-void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
-                 enum hrtimer_mode mode)
-{
-       debug_init(timer, clock_id, mode);
-       __hrtimer_init(timer, clock_id, mode);
-}
-EXPORT_SYMBOL_GPL(hrtimer_init);
-
-/**
- * hrtimer_get_res - get the timer resolution for a clock
- * @which_clock: which clock to query
- * @tp:                 pointer to timespec variable to store the resolution
- *
- * Store the resolution of the clock selected by @which_clock in the
- * variable pointed to by @tp.
- */
-int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
-{
-       struct hrtimer_cpu_base *cpu_base;
-       int base = hrtimer_clockid_to_base(which_clock);
-
-       cpu_base = &__raw_get_cpu_var(hrtimer_bases);
-       *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution);
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(hrtimer_get_res);
-
-static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
-{
-       struct hrtimer_clock_base *base = timer->base;
-       struct hrtimer_cpu_base *cpu_base = base->cpu_base;
-       enum hrtimer_restart (*fn)(struct hrtimer *);
-       int restart;
-
-       WARN_ON(!irqs_disabled());
-
-       debug_deactivate(timer);
-       __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
-       timer_stats_account_hrtimer(timer);
-       fn = timer->function;
-
-       /*
-        * Because we run timers from hardirq context, there is no chance
-        * they get migrated to another cpu, therefore its safe to unlock
-        * the timer base.
-        */
-       raw_spin_unlock(&cpu_base->lock);
-       trace_hrtimer_expire_entry(timer, now);
-       restart = fn(timer);
-       trace_hrtimer_expire_exit(timer);
-       raw_spin_lock(&cpu_base->lock);
-
-       /*
-        * Note: We clear the CALLBACK bit after enqueue_hrtimer and
-        * we do not reprogramm the event hardware. Happens either in
-        * hrtimer_start_range_ns() or in hrtimer_interrupt()
-        */
-       if (restart != HRTIMER_NORESTART) {
-               BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
-               enqueue_hrtimer(timer, base);
-       }
-
-       WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
-
-       timer->state &= ~HRTIMER_STATE_CALLBACK;
-}
-
-#ifdef CONFIG_HIGH_RES_TIMERS
-
-/*
- * High resolution timer interrupt
- * Called with interrupts disabled
- */
-void hrtimer_interrupt(struct clock_event_device *dev)
-{
-       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-       ktime_t expires_next, now, entry_time, delta;
-       int i, retries = 0;
-
-       BUG_ON(!cpu_base->hres_active);
-       cpu_base->nr_events++;
-       dev->next_event.tv64 = KTIME_MAX;
-
-       raw_spin_lock(&cpu_base->lock);
-       entry_time = now = hrtimer_update_base(cpu_base);
-retry:
-       expires_next.tv64 = KTIME_MAX;
-       /*
-        * We set expires_next to KTIME_MAX here with cpu_base->lock
-        * held to prevent that a timer is enqueued in our queue via
-        * the migration code. This does not affect enqueueing of
-        * timers which run their callback and need to be requeued on
-        * this CPU.
-        */
-       cpu_base->expires_next.tv64 = KTIME_MAX;
-
-       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-               struct hrtimer_clock_base *base;
-               struct timerqueue_node *node;
-               ktime_t basenow;
-
-               if (!(cpu_base->active_bases & (1 << i)))
-                       continue;
-
-               base = cpu_base->clock_base + i;
-               basenow = ktime_add(now, base->offset);
-
-               while ((node = timerqueue_getnext(&base->active))) {
-                       struct hrtimer *timer;
-
-                       timer = container_of(node, struct hrtimer, node);
-
-                       /*
-                        * The immediate goal for using the softexpires is
-                        * minimizing wakeups, not running timers at the
-                        * earliest interrupt after their soft expiration.
-                        * This allows us to avoid using a Priority Search
-                        * Tree, which can answer a stabbing querry for
-                        * overlapping intervals and instead use the simple
-                        * BST we already have.
-                        * We don't add extra wakeups by delaying timers that
-                        * are right-of a not yet expired timer, because that
-                        * timer will have to trigger a wakeup anyway.
-                        */
-
-                       if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
-                               ktime_t expires;
-
-                               expires = ktime_sub(hrtimer_get_expires(timer),
-                                                   base->offset);
-                               if (expires.tv64 < 0)
-                                       expires.tv64 = KTIME_MAX;
-                               if (expires.tv64 < expires_next.tv64)
-                                       expires_next = expires;
-                               break;
-                       }
-
-                       __run_hrtimer(timer, &basenow);
-               }
-       }
-
-       /*
-        * Store the new expiry value so the migration code can verify
-        * against it.
-        */
-       cpu_base->expires_next = expires_next;
-       raw_spin_unlock(&cpu_base->lock);
-
-       /* Reprogramming necessary ? */
-       if (expires_next.tv64 == KTIME_MAX ||
-           !tick_program_event(expires_next, 0)) {
-               cpu_base->hang_detected = 0;
-               return;
-       }
-
-       /*
-        * The next timer was already expired due to:
-        * - tracing
-        * - long lasting callbacks
-        * - being scheduled away when running in a VM
-        *
-        * We need to prevent that we loop forever in the hrtimer
-        * interrupt routine. We give it 3 attempts to avoid
-        * overreacting on some spurious event.
-        *
-        * Acquire base lock for updating the offsets and retrieving
-        * the current time.
-        */
-       raw_spin_lock(&cpu_base->lock);
-       now = hrtimer_update_base(cpu_base);
-       cpu_base->nr_retries++;
-       if (++retries < 3)
-               goto retry;
-       /*
-        * Give the system a chance to do something else than looping
-        * here. We stored the entry time, so we know exactly how long
-        * we spent here. We schedule the next event this amount of
-        * time away.
-        */
-       cpu_base->nr_hangs++;
-       cpu_base->hang_detected = 1;
-       raw_spin_unlock(&cpu_base->lock);
-       delta = ktime_sub(now, entry_time);
-       if (delta.tv64 > cpu_base->max_hang_time.tv64)
-               cpu_base->max_hang_time = delta;
-       /*
-        * Limit it to a sensible value as we enforce a longer
-        * delay. Give the CPU at least 100ms to catch up.
-        */
-       if (delta.tv64 > 100 * NSEC_PER_MSEC)
-               expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
-       else
-               expires_next = ktime_add(now, delta);
-       tick_program_event(expires_next, 1);
-       printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
-                   ktime_to_ns(delta));
-}
-
-/*
- * local version of hrtimer_peek_ahead_timers() called with interrupts
- * disabled.
- */
-static void __hrtimer_peek_ahead_timers(void)
-{
-       struct tick_device *td;
-
-       if (!hrtimer_hres_active())
-               return;
-
-       td = &__get_cpu_var(tick_cpu_device);
-       if (td && td->evtdev)
-               hrtimer_interrupt(td->evtdev);
-}
-
-/**
- * hrtimer_peek_ahead_timers -- run soft-expired timers now
- *
- * hrtimer_peek_ahead_timers will peek at the timer queue of
- * the current cpu and check if there are any timers for which
- * the soft expires time has passed. If any such timers exist,
- * they are run immediately and then removed from the timer queue.
- *
- */
-void hrtimer_peek_ahead_timers(void)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __hrtimer_peek_ahead_timers();
-       local_irq_restore(flags);
-}
-
-static void run_hrtimer_softirq(struct softirq_action *h)
-{
-       hrtimer_peek_ahead_timers();
-}
-
-#else /* CONFIG_HIGH_RES_TIMERS */
-
-static inline void __hrtimer_peek_ahead_timers(void) { }
-
-#endif /* !CONFIG_HIGH_RES_TIMERS */
-
-/*
- * Called from timer softirq every jiffy, expire hrtimers:
- *
- * For HRT its the fall back code to run the softirq in the timer
- * softirq context in case the hrtimer initialization failed or has
- * not been done yet.
- */
-void hrtimer_run_pending(void)
-{
-       if (hrtimer_hres_active())
-               return;
-
-       /*
-        * This _is_ ugly: We have to check in the softirq context,
-        * whether we can switch to highres and / or nohz mode. The
-        * clocksource switch happens in the timer interrupt with
-        * xtime_lock held. Notification from there only sets the
-        * check bit in the tick_oneshot code, otherwise we might
-        * deadlock vs. xtime_lock.
-        */
-       if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
-               hrtimer_switch_to_hres();
-}
-
-/*
- * Called from hardirq context every jiffy
- */
-void hrtimer_run_queues(void)
-{
-       struct timerqueue_node *node;
-       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-       struct hrtimer_clock_base *base;
-       int index, gettime = 1;
-
-       if (hrtimer_hres_active())
-               return;
-
-       for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
-               base = &cpu_base->clock_base[index];
-               if (!timerqueue_getnext(&base->active))
-                       continue;
-
-               if (gettime) {
-                       hrtimer_get_softirq_time(cpu_base);
-                       gettime = 0;
-               }
-
-               raw_spin_lock(&cpu_base->lock);
-
-               while ((node = timerqueue_getnext(&base->active))) {
-                       struct hrtimer *timer;
-
-                       timer = container_of(node, struct hrtimer, node);
-                       if (base->softirq_time.tv64 <=
-                                       hrtimer_get_expires_tv64(timer))
-                               break;
-
-                       __run_hrtimer(timer, &base->softirq_time);
-               }
-               raw_spin_unlock(&cpu_base->lock);
-       }
-}
-
-/*
- * Sleep related functions:
- */
-static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
-{
-       struct hrtimer_sleeper *t =
-               container_of(timer, struct hrtimer_sleeper, timer);
-       struct task_struct *task = t->task;
-
-       t->task = NULL;
-       if (task)
-               wake_up_process(task);
-
-       return HRTIMER_NORESTART;
-}
-
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
-{
-       sl->timer.function = hrtimer_wakeup;
-       sl->task = task;
-}
-EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
-
-static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
-{
-       hrtimer_init_sleeper(t, current);
-
-       do {
-               set_current_state(TASK_INTERRUPTIBLE);
-               hrtimer_start_expires(&t->timer, mode);
-               if (!hrtimer_active(&t->timer))
-                       t->task = NULL;
-
-               if (likely(t->task))
-                       freezable_schedule();
-
-               hrtimer_cancel(&t->timer);
-               mode = HRTIMER_MODE_ABS;
-
-       } while (t->task && !signal_pending(current));
-
-       __set_current_state(TASK_RUNNING);
-
-       return t->task == NULL;
-}
-
-static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
-{
-       struct timespec rmt;
-       ktime_t rem;
-
-       rem = hrtimer_expires_remaining(timer);
-       if (rem.tv64 <= 0)
-               return 0;
-       rmt = ktime_to_timespec(rem);
-
-       if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
-               return -EFAULT;
-
-       return 1;
-}
-
-long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
-{
-       struct hrtimer_sleeper t;
-       struct timespec __user  *rmtp;
-       int ret = 0;
-
-       hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
-                               HRTIMER_MODE_ABS);
-       hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
-
-       if (do_nanosleep(&t, HRTIMER_MODE_ABS))
-               goto out;
-
-       rmtp = restart->nanosleep.rmtp;
-       if (rmtp) {
-               ret = update_rmtp(&t.timer, rmtp);
-               if (ret <= 0)
-                       goto out;
-       }
-
-       /* The other values in restart are already filled in */
-       ret = -ERESTART_RESTARTBLOCK;
-out:
-       destroy_hrtimer_on_stack(&t.timer);
-       return ret;
-}
-
-long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
-                      const enum hrtimer_mode mode, const clockid_t clockid)
-{
-       struct restart_block *restart;
-       struct hrtimer_sleeper t;
-       int ret = 0;
-       unsigned long slack;
-
-       slack = current->timer_slack_ns;
-       if (dl_task(current) || rt_task(current))
-               slack = 0;
-
-       hrtimer_init_on_stack(&t.timer, clockid, mode);
-       hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
-       if (do_nanosleep(&t, mode))
-               goto out;
-
-       /* Absolute timers do not update the rmtp value and restart: */
-       if (mode == HRTIMER_MODE_ABS) {
-               ret = -ERESTARTNOHAND;
-               goto out;
-       }
-
-       if (rmtp) {
-               ret = update_rmtp(&t.timer, rmtp);
-               if (ret <= 0)
-                       goto out;
-       }
-
-       restart = &current_thread_info()->restart_block;
-       restart->fn = hrtimer_nanosleep_restart;
-       restart->nanosleep.clockid = t.timer.base->clockid;
-       restart->nanosleep.rmtp = rmtp;
-       restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
-
-       ret = -ERESTART_RESTARTBLOCK;
-out:
-       destroy_hrtimer_on_stack(&t.timer);
-       return ret;
-}
-
-SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
-               struct timespec __user *, rmtp)
-{
-       struct timespec tu;
-
-       if (copy_from_user(&tu, rqtp, sizeof(tu)))
-               return -EFAULT;
-
-       if (!timespec_valid(&tu))
-               return -EINVAL;
-
-       return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
-}
-
-/*
- * Functions related to boot-time initialization:
- */
-static void init_hrtimers_cpu(int cpu)
-{
-       struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
-       int i;
-
-       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-               cpu_base->clock_base[i].cpu_base = cpu_base;
-               timerqueue_init_head(&cpu_base->clock_base[i].active);
-       }
-
-       hrtimer_init_hres(cpu_base);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
-                               struct hrtimer_clock_base *new_base)
-{
-       struct hrtimer *timer;
-       struct timerqueue_node *node;
-
-       while ((node = timerqueue_getnext(&old_base->active))) {
-               timer = container_of(node, struct hrtimer, node);
-               BUG_ON(hrtimer_callback_running(timer));
-               debug_deactivate(timer);
-
-               /*
-                * Mark it as STATE_MIGRATE not INACTIVE otherwise the
-                * timer could be seen as !active and just vanish away
-                * under us on another CPU
-                */
-               __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
-               timer->base = new_base;
-               /*
-                * Enqueue the timers on the new cpu. This does not
-                * reprogram the event device in case the timer
-                * expires before the earliest on this CPU, but we run
-                * hrtimer_interrupt after we migrated everything to
-                * sort out already expired timers and reprogram the
-                * event device.
-                */
-               enqueue_hrtimer(timer, new_base);
-
-               /* Clear the migration state bit */
-               timer->state &= ~HRTIMER_STATE_MIGRATE;
-       }
-}
-
-static void migrate_hrtimers(int scpu)
-{
-       struct hrtimer_cpu_base *old_base, *new_base;
-       int i;
-
-       BUG_ON(cpu_online(scpu));
-       tick_cancel_sched_timer(scpu);
-
-       local_irq_disable();
-       old_base = &per_cpu(hrtimer_bases, scpu);
-       new_base = &__get_cpu_var(hrtimer_bases);
-       /*
-        * The caller is globally serialized and nobody else
-        * takes two locks at once, deadlock is not possible.
-        */
-       raw_spin_lock(&new_base->lock);
-       raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
-
-       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-               migrate_hrtimer_list(&old_base->clock_base[i],
-                                    &new_base->clock_base[i]);
-       }
-
-       raw_spin_unlock(&old_base->lock);
-       raw_spin_unlock(&new_base->lock);
-
-       /* Check, if we got expired work to do */
-       __hrtimer_peek_ahead_timers();
-       local_irq_enable();
-}
-
-#endif /* CONFIG_HOTPLUG_CPU */
-
-static int hrtimer_cpu_notify(struct notifier_block *self,
-                                       unsigned long action, void *hcpu)
-{
-       int scpu = (long)hcpu;
-
-       switch (action) {
-
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               init_hrtimers_cpu(scpu);
-               break;
-
-#ifdef CONFIG_HOTPLUG_CPU
-       case CPU_DYING:
-       case CPU_DYING_FROZEN:
-               clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-       {
-               clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
-               migrate_hrtimers(scpu);
-               break;
-       }
-#endif
-
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block hrtimers_nb = {
-       .notifier_call = hrtimer_cpu_notify,
-};
-
-void __init hrtimers_init(void)
-{
-       hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
-                         (void *)(long)smp_processor_id());
-       register_cpu_notifier(&hrtimers_nb);
-#ifdef CONFIG_HIGH_RES_TIMERS
-       open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
-#endif
-}
-
-/**
- * schedule_hrtimeout_range_clock - sleep until timeout
- * @expires:   timeout value (ktime_t)
- * @delta:     slack in expires timeout (ktime_t)
- * @mode:      timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
- * @clock:     timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
- */
-int __sched
-schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
-                              const enum hrtimer_mode mode, int clock)
-{
-       struct hrtimer_sleeper t;
-
-       /*
-        * Optimize when a zero timeout value is given. It does not
-        * matter whether this is an absolute or a relative time.
-        */
-       if (expires && !expires->tv64) {
-               __set_current_state(TASK_RUNNING);
-               return 0;
-       }
-
-       /*
-        * A NULL parameter means "infinite"
-        */
-       if (!expires) {
-               schedule();
-               __set_current_state(TASK_RUNNING);
-               return -EINTR;
-       }
-
-       hrtimer_init_on_stack(&t.timer, clock, mode);
-       hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
-
-       hrtimer_init_sleeper(&t, current);
-
-       hrtimer_start_expires(&t.timer, mode);
-       if (!hrtimer_active(&t.timer))
-               t.task = NULL;
-
-       if (likely(t.task))
-               schedule();
-
-       hrtimer_cancel(&t.timer);
-       destroy_hrtimer_on_stack(&t.timer);
-
-       __set_current_state(TASK_RUNNING);
-
-       return !t.task ? 0 : -EINTR;
-}
-
-/**
- * schedule_hrtimeout_range - sleep until timeout
- * @expires:   timeout value (ktime_t)
- * @delta:     slack in expires timeout (ktime_t)
- * @mode:      timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
- *
- * Make the current task sleep until the given expiry time has
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * The @delta argument gives the kernel the freedom to schedule the
- * actual wakeup to a time that is both power and performance friendly.
- * The kernel give the normal best effort behavior for "@expires+@delta",
- * but may decide to fire the timer earlier, but no earlier than @expires.
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns.
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task.
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Returns 0 when the timer has expired otherwise -EINTR
- */
-int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
-                                    const enum hrtimer_mode mode)
-{
-       return schedule_hrtimeout_range_clock(expires, delta, mode,
-                                             CLOCK_MONOTONIC);
-}
-EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
-
-/**
- * schedule_hrtimeout - sleep until timeout
- * @expires:   timeout value (ktime_t)
- * @mode:      timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
- *
- * Make the current task sleep until the given expiry time has
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns.
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task.
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Returns 0 when the timer has expired otherwise -EINTR
- */
-int __sched schedule_hrtimeout(ktime_t *expires,
-                              const enum hrtimer_mode mode)
-{
-       return schedule_hrtimeout_range(expires, 0, mode);
-}
-EXPORT_SYMBOL_GPL(schedule_hrtimeout);
diff --git a/kernel/itimer.c b/kernel/itimer.c
deleted file mode 100644 (file)
index 8d262b4..0000000
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * linux/kernel/itimer.c
- *
- * Copyright (C) 1992 Darren Senn
- */
-
-/* These are all the functions necessary to implement itimers */
-
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/syscalls.h>
-#include <linux/time.h>
-#include <linux/posix-timers.h>
-#include <linux/hrtimer.h>
-#include <trace/events/timer.h>
-
-#include <asm/uaccess.h>
-
-/**
- * itimer_get_remtime - get remaining time for the timer
- *
- * @timer: the timer to read
- *
- * Returns the delta between the expiry time and now, which can be
- * less than zero or 1usec for an pending expired timer
- */
-static struct timeval itimer_get_remtime(struct hrtimer *timer)
-{
-       ktime_t rem = hrtimer_get_remaining(timer);
-
-       /*
-        * Racy but safe: if the itimer expires after the above
-        * hrtimer_get_remtime() call but before this condition
-        * then we return 0 - which is correct.
-        */
-       if (hrtimer_active(timer)) {
-               if (rem.tv64 <= 0)
-                       rem.tv64 = NSEC_PER_USEC;
-       } else
-               rem.tv64 = 0;
-
-       return ktime_to_timeval(rem);
-}
-
-static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
-                          struct itimerval *const value)
-{
-       cputime_t cval, cinterval;
-       struct cpu_itimer *it = &tsk->signal->it[clock_id];
-
-       spin_lock_irq(&tsk->sighand->siglock);
-
-       cval = it->expires;
-       cinterval = it->incr;
-       if (cval) {
-               struct task_cputime cputime;
-               cputime_t t;
-
-               thread_group_cputimer(tsk, &cputime);
-               if (clock_id == CPUCLOCK_PROF)
-                       t = cputime.utime + cputime.stime;
-               else
-                       /* CPUCLOCK_VIRT */
-                       t = cputime.utime;
-
-               if (cval < t)
-                       /* about to fire */
-                       cval = cputime_one_jiffy;
-               else
-                       cval = cval - t;
-       }
-
-       spin_unlock_irq(&tsk->sighand->siglock);
-
-       cputime_to_timeval(cval, &value->it_value);
-       cputime_to_timeval(cinterval, &value->it_interval);
-}
-
-int do_getitimer(int which, struct itimerval *value)
-{
-       struct task_struct *tsk = current;
-
-       switch (which) {
-       case ITIMER_REAL:
-               spin_lock_irq(&tsk->sighand->siglock);
-               value->it_value = itimer_get_remtime(&tsk->signal->real_timer);
-               value->it_interval =
-                       ktime_to_timeval(tsk->signal->it_real_incr);
-               spin_unlock_irq(&tsk->sighand->siglock);
-               break;
-       case ITIMER_VIRTUAL:
-               get_cpu_itimer(tsk, CPUCLOCK_VIRT, value);
-               break;
-       case ITIMER_PROF:
-               get_cpu_itimer(tsk, CPUCLOCK_PROF, value);
-               break;
-       default:
-               return(-EINVAL);
-       }
-       return 0;
-}
-
-SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value)
-{
-       int error = -EFAULT;
-       struct itimerval get_buffer;
-
-       if (value) {
-               error = do_getitimer(which, &get_buffer);
-               if (!error &&
-                   copy_to_user(value, &get_buffer, sizeof(get_buffer)))
-                       error = -EFAULT;
-       }
-       return error;
-}
-
-
-/*
- * The timer is automagically restarted, when interval != 0
- */
-enum hrtimer_restart it_real_fn(struct hrtimer *timer)
-{
-       struct signal_struct *sig =
-               container_of(timer, struct signal_struct, real_timer);
-
-       trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
-       kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
-
-       return HRTIMER_NORESTART;
-}
-
-static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns)
-{
-       struct timespec ts;
-       s64 cpu_ns;
-
-       cputime_to_timespec(ct, &ts);
-       cpu_ns = timespec_to_ns(&ts);
-
-       return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns;
-}
-
-static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
-                          const struct itimerval *const value,
-                          struct itimerval *const ovalue)
-{
-       cputime_t cval, nval, cinterval, ninterval;
-       s64 ns_ninterval, ns_nval;
-       u32 error, incr_error;
-       struct cpu_itimer *it = &tsk->signal->it[clock_id];
-
-       nval = timeval_to_cputime(&value->it_value);
-       ns_nval = timeval_to_ns(&value->it_value);
-       ninterval = timeval_to_cputime(&value->it_interval);
-       ns_ninterval = timeval_to_ns(&value->it_interval);
-
-       error = cputime_sub_ns(nval, ns_nval);
-       incr_error = cputime_sub_ns(ninterval, ns_ninterval);
-
-       spin_lock_irq(&tsk->sighand->siglock);
-
-       cval = it->expires;
-       cinterval = it->incr;
-       if (cval || nval) {
-               if (nval > 0)
-                       nval += cputime_one_jiffy;
-               set_process_cpu_timer(tsk, clock_id, &nval, &cval);
-       }
-       it->expires = nval;
-       it->incr = ninterval;
-       it->error = error;
-       it->incr_error = incr_error;
-       trace_itimer_state(clock_id == CPUCLOCK_VIRT ?
-                          ITIMER_VIRTUAL : ITIMER_PROF, value, nval);
-
-       spin_unlock_irq(&tsk->sighand->siglock);
-
-       if (ovalue) {
-               cputime_to_timeval(cval, &ovalue->it_value);
-               cputime_to_timeval(cinterval, &ovalue->it_interval);
-       }
-}
-
-/*
- * Returns true if the timeval is in canonical form
- */
-#define timeval_valid(t) \
-       (((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC))
-
-int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
-{
-       struct task_struct *tsk = current;
-       struct hrtimer *timer;
-       ktime_t expires;
-
-       /*
-        * Validate the timevals in value.
-        */
-       if (!timeval_valid(&value->it_value) ||
-           !timeval_valid(&value->it_interval))
-               return -EINVAL;
-
-       switch (which) {
-       case ITIMER_REAL:
-again:
-               spin_lock_irq(&tsk->sighand->siglock);
-               timer = &tsk->signal->real_timer;
-               if (ovalue) {
-                       ovalue->it_value = itimer_get_remtime(timer);
-                       ovalue->it_interval
-                               = ktime_to_timeval(tsk->signal->it_real_incr);
-               }
-               /* We are sharing ->siglock with it_real_fn() */
-               if (hrtimer_try_to_cancel(timer) < 0) {
-                       spin_unlock_irq(&tsk->sighand->siglock);
-                       goto again;
-               }
-               expires = timeval_to_ktime(value->it_value);
-               if (expires.tv64 != 0) {
-                       tsk->signal->it_real_incr =
-                               timeval_to_ktime(value->it_interval);
-                       hrtimer_start(timer, expires, HRTIMER_MODE_REL);
-               } else
-                       tsk->signal->it_real_incr.tv64 = 0;
-
-               trace_itimer_state(ITIMER_REAL, value, 0);
-               spin_unlock_irq(&tsk->sighand->siglock);
-               break;
-       case ITIMER_VIRTUAL:
-               set_cpu_itimer(tsk, CPUCLOCK_VIRT, value, ovalue);
-               break;
-       case ITIMER_PROF:
-               set_cpu_itimer(tsk, CPUCLOCK_PROF, value, ovalue);
-               break;
-       default:
-               return -EINVAL;
-       }
-       return 0;
-}
-
-/**
- * alarm_setitimer - set alarm in seconds
- *
- * @seconds:   number of seconds until alarm
- *             0 disables the alarm
- *
- * Returns the remaining time in seconds of a pending timer or 0 when
- * the timer is not active.
- *
- * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid
- * negative timeval settings which would cause immediate expiry.
- */
-unsigned int alarm_setitimer(unsigned int seconds)
-{
-       struct itimerval it_new, it_old;
-
-#if BITS_PER_LONG < 64
-       if (seconds > INT_MAX)
-               seconds = INT_MAX;
-#endif
-       it_new.it_value.tv_sec = seconds;
-       it_new.it_value.tv_usec = 0;
-       it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
-
-       do_setitimer(ITIMER_REAL, &it_new, &it_old);
-
-       /*
-        * We can't return 0 if we have an alarm pending ...  And we'd
-        * better return too much than too little anyway
-        */
-       if ((!it_old.it_value.tv_sec && it_old.it_value.tv_usec) ||
-             it_old.it_value.tv_usec >= 500000)
-               it_old.it_value.tv_sec++;
-
-       return it_old.it_value.tv_sec;
-}
-
-SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
-               struct itimerval __user *, ovalue)
-{
-       struct itimerval set_buffer, get_buffer;
-       int error;
-
-       if (value) {
-               if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
-                       return -EFAULT;
-       } else {
-               memset(&set_buffer, 0, sizeof(set_buffer));
-               printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer."
-                           " Misfeature support will be removed\n",
-                           current->comm);
-       }
-
-       error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
-       if (error || !ovalue)
-               return error;
-
-       if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
-               return -EFAULT;
-       return 0;
-}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
deleted file mode 100644 (file)
index 3b89464..0000000
+++ /dev/null
@@ -1,1490 +0,0 @@
-/*
- * Implement CPU time clocks for the POSIX clock interface.
- */
-
-#include <linux/sched.h>
-#include <linux/posix-timers.h>
-#include <linux/errno.h>
-#include <linux/math64.h>
-#include <asm/uaccess.h>
-#include <linux/kernel_stat.h>
-#include <trace/events/timer.h>
-#include <linux/random.h>
-#include <linux/tick.h>
-#include <linux/workqueue.h>
-
-/*
- * Called after updating RLIMIT_CPU to run cpu timer and update
- * tsk->signal->cputime_expires expiration cache if necessary. Needs
- * siglock protection since other code may update expiration cache as
- * well.
- */
-void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
-{
-       cputime_t cputime = secs_to_cputime(rlim_new);
-
-       spin_lock_irq(&task->sighand->siglock);
-       set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
-       spin_unlock_irq(&task->sighand->siglock);
-}
-
-static int check_clock(const clockid_t which_clock)
-{
-       int error = 0;
-       struct task_struct *p;
-       const pid_t pid = CPUCLOCK_PID(which_clock);
-
-       if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
-               return -EINVAL;
-
-       if (pid == 0)
-               return 0;
-
-       rcu_read_lock();
-       p = find_task_by_vpid(pid);
-       if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
-                  same_thread_group(p, current) : has_group_leader_pid(p))) {
-               error = -EINVAL;
-       }
-       rcu_read_unlock();
-
-       return error;
-}
-
-static inline unsigned long long
-timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
-{
-       unsigned long long ret;
-
-       ret = 0;                /* high half always zero when .cpu used */
-       if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
-               ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
-       } else {
-               ret = cputime_to_expires(timespec_to_cputime(tp));
-       }
-       return ret;
-}
-
-static void sample_to_timespec(const clockid_t which_clock,
-                              unsigned long long expires,
-                              struct timespec *tp)
-{
-       if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
-               *tp = ns_to_timespec(expires);
-       else
-               cputime_to_timespec((__force cputime_t)expires, tp);
-}
-
-/*
- * Update expiry time from increment, and increase overrun count,
- * given the current clock sample.
- */
-static void bump_cpu_timer(struct k_itimer *timer,
-                          unsigned long long now)
-{
-       int i;
-       unsigned long long delta, incr;
-
-       if (timer->it.cpu.incr == 0)
-               return;
-
-       if (now < timer->it.cpu.expires)
-               return;
-
-       incr = timer->it.cpu.incr;
-       delta = now + incr - timer->it.cpu.expires;
-
-       /* Don't use (incr*2 < delta), incr*2 might overflow. */
-       for (i = 0; incr < delta - incr; i++)
-               incr = incr << 1;
-
-       for (; i >= 0; incr >>= 1, i--) {
-               if (delta < incr)
-                       continue;
-
-               timer->it.cpu.expires += incr;
-               timer->it_overrun += 1 << i;
-               delta -= incr;
-       }
-}
-
-/**
- * task_cputime_zero - Check a task_cputime struct for all zero fields.
- *
- * @cputime:   The struct to compare.
- *
- * Checks @cputime to see if all fields are zero.  Returns true if all fields
- * are zero, false if any field is nonzero.
- */
-static inline int task_cputime_zero(const struct task_cputime *cputime)
-{
-       if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
-               return 1;
-       return 0;
-}
-
-static inline unsigned long long prof_ticks(struct task_struct *p)
-{
-       cputime_t utime, stime;
-
-       task_cputime(p, &utime, &stime);
-
-       return cputime_to_expires(utime + stime);
-}
-static inline unsigned long long virt_ticks(struct task_struct *p)
-{
-       cputime_t utime;
-
-       task_cputime(p, &utime, NULL);
-
-       return cputime_to_expires(utime);
-}
-
-static int
-posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
-{
-       int error = check_clock(which_clock);
-       if (!error) {
-               tp->tv_sec = 0;
-               tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
-               if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
-                       /*
-                        * If sched_clock is using a cycle counter, we
-                        * don't have any idea of its true resolution
-                        * exported, but it is much more than 1s/HZ.
-                        */
-                       tp->tv_nsec = 1;
-               }
-       }
-       return error;
-}
-
-static int
-posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
-{
-       /*
-        * You can never reset a CPU clock, but we check for other errors
-        * in the call before failing with EPERM.
-        */
-       int error = check_clock(which_clock);
-       if (error == 0) {
-               error = -EPERM;
-       }
-       return error;
-}
-
-
-/*
- * Sample a per-thread clock for the given task.
- */
-static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
-                           unsigned long long *sample)
-{
-       switch (CPUCLOCK_WHICH(which_clock)) {
-       default:
-               return -EINVAL;
-       case CPUCLOCK_PROF:
-               *sample = prof_ticks(p);
-               break;
-       case CPUCLOCK_VIRT:
-               *sample = virt_ticks(p);
-               break;
-       case CPUCLOCK_SCHED:
-               *sample = task_sched_runtime(p);
-               break;
-       }
-       return 0;
-}
-
-static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
-{
-       if (b->utime > a->utime)
-               a->utime = b->utime;
-
-       if (b->stime > a->stime)
-               a->stime = b->stime;
-
-       if (b->sum_exec_runtime > a->sum_exec_runtime)
-               a->sum_exec_runtime = b->sum_exec_runtime;
-}
-
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
-{
-       struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
-       struct task_cputime sum;
-       unsigned long flags;
-
-       if (!cputimer->running) {
-               /*
-                * The POSIX timer interface allows for absolute time expiry
-                * values through the TIMER_ABSTIME flag, therefore we have
-                * to synchronize the timer to the clock every time we start
-                * it.
-                */
-               thread_group_cputime(tsk, &sum);
-               raw_spin_lock_irqsave(&cputimer->lock, flags);
-               cputimer->running = 1;
-               update_gt_cputime(&cputimer->cputime, &sum);
-       } else
-               raw_spin_lock_irqsave(&cputimer->lock, flags);
-       *times = cputimer->cputime;
-       raw_spin_unlock_irqrestore(&cputimer->lock, flags);
-}
-
-/*
- * Sample a process (thread group) clock for the given group_leader task.
- * Must be called with task sighand lock held for safe while_each_thread()
- * traversal.
- */
-static int cpu_clock_sample_group(const clockid_t which_clock,
-                                 struct task_struct *p,
-                                 unsigned long long *sample)
-{
-       struct task_cputime cputime;
-
-       switch (CPUCLOCK_WHICH(which_clock)) {
-       default:
-               return -EINVAL;
-       case CPUCLOCK_PROF:
-               thread_group_cputime(p, &cputime);
-               *sample = cputime_to_expires(cputime.utime + cputime.stime);
-               break;
-       case CPUCLOCK_VIRT:
-               thread_group_cputime(p, &cputime);
-               *sample = cputime_to_expires(cputime.utime);
-               break;
-       case CPUCLOCK_SCHED:
-               thread_group_cputime(p, &cputime);
-               *sample = cputime.sum_exec_runtime;
-               break;
-       }
-       return 0;
-}
-
-static int posix_cpu_clock_get_task(struct task_struct *tsk,
-                                   const clockid_t which_clock,
-                                   struct timespec *tp)
-{
-       int err = -EINVAL;
-       unsigned long long rtn;
-
-       if (CPUCLOCK_PERTHREAD(which_clock)) {
-               if (same_thread_group(tsk, current))
-                       err = cpu_clock_sample(which_clock, tsk, &rtn);
-       } else {
-               unsigned long flags;
-               struct sighand_struct *sighand;
-
-               /*
-                * while_each_thread() is not yet entirely RCU safe,
-                * keep locking the group while sampling process
-                * clock for now.
-                */
-               sighand = lock_task_sighand(tsk, &flags);
-               if (!sighand)
-                       return err;
-
-               if (tsk == current || thread_group_leader(tsk))
-                       err = cpu_clock_sample_group(which_clock, tsk, &rtn);
-
-               unlock_task_sighand(tsk, &flags);
-       }
-
-       if (!err)
-               sample_to_timespec(which_clock, rtn, tp);
-
-       return err;
-}
-
-
-static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
-{
-       const pid_t pid = CPUCLOCK_PID(which_clock);
-       int err = -EINVAL;
-
-       if (pid == 0) {
-               /*
-                * Special case constant value for our own clocks.
-                * We don't have to do any lookup to find ourselves.
-                */
-               err = posix_cpu_clock_get_task(current, which_clock, tp);
-       } else {
-               /*
-                * Find the given PID, and validate that the caller
-                * should be able to see it.
-                */
-               struct task_struct *p;
-               rcu_read_lock();
-               p = find_task_by_vpid(pid);
-               if (p)
-                       err = posix_cpu_clock_get_task(p, which_clock, tp);
-               rcu_read_unlock();
-       }
-
-       return err;
-}
-
-
-/*
- * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
- * This is called from sys_timer_create() and do_cpu_nanosleep() with the
- * new timer already all-zeros initialized.
- */
-static int posix_cpu_timer_create(struct k_itimer *new_timer)
-{
-       int ret = 0;
-       const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
-       struct task_struct *p;
-
-       if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
-               return -EINVAL;
-
-       INIT_LIST_HEAD(&new_timer->it.cpu.entry);
-
-       rcu_read_lock();
-       if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
-               if (pid == 0) {
-                       p = current;
-               } else {
-                       p = find_task_by_vpid(pid);
-                       if (p && !same_thread_group(p, current))
-                               p = NULL;
-               }
-       } else {
-               if (pid == 0) {
-                       p = current->group_leader;
-               } else {
-                       p = find_task_by_vpid(pid);
-                       if (p && !has_group_leader_pid(p))
-                               p = NULL;
-               }
-       }
-       new_timer->it.cpu.task = p;
-       if (p) {
-               get_task_struct(p);
-       } else {
-               ret = -EINVAL;
-       }
-       rcu_read_unlock();
-
-       return ret;
-}
-
-/*
- * Clean up a CPU-clock timer that is about to be destroyed.
- * This is called from timer deletion with the timer already locked.
- * If we return TIMER_RETRY, it's necessary to release the timer's lock
- * and try again.  (This happens when the timer is in the middle of firing.)
- */
-static int posix_cpu_timer_del(struct k_itimer *timer)
-{
-       int ret = 0;
-       unsigned long flags;
-       struct sighand_struct *sighand;
-       struct task_struct *p = timer->it.cpu.task;
-
-       WARN_ON_ONCE(p == NULL);
-
-       /*
-        * Protect against sighand release/switch in exit/exec and process/
-        * thread timer list entry concurrent read/writes.
-        */
-       sighand = lock_task_sighand(p, &flags);
-       if (unlikely(sighand == NULL)) {
-               /*
-                * We raced with the reaping of the task.
-                * The deletion should have cleared us off the list.
-                */
-               WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
-       } else {
-               if (timer->it.cpu.firing)
-                       ret = TIMER_RETRY;
-               else
-                       list_del(&timer->it.cpu.entry);
-
-               unlock_task_sighand(p, &flags);
-       }
-
-       if (!ret)
-               put_task_struct(p);
-
-       return ret;
-}
-
-static void cleanup_timers_list(struct list_head *head)
-{
-       struct cpu_timer_list *timer, *next;
-
-       list_for_each_entry_safe(timer, next, head, entry)
-               list_del_init(&timer->entry);
-}
-
-/*
- * Clean out CPU timers still ticking when a thread exited.  The task
- * pointer is cleared, and the expiry time is replaced with the residual
- * time for later timer_gettime calls to return.
- * This must be called with the siglock held.
- */
-static void cleanup_timers(struct list_head *head)
-{
-       cleanup_timers_list(head);
-       cleanup_timers_list(++head);
-       cleanup_timers_list(++head);
-}
-
-/*
- * These are both called with the siglock held, when the current thread
- * is being reaped.  When the final (leader) thread in the group is reaped,
- * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
- */
-void posix_cpu_timers_exit(struct task_struct *tsk)
-{
-       add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-                                               sizeof(unsigned long long));
-       cleanup_timers(tsk->cpu_timers);
-
-}
-void posix_cpu_timers_exit_group(struct task_struct *tsk)
-{
-       cleanup_timers(tsk->signal->cpu_timers);
-}
-
-static inline int expires_gt(cputime_t expires, cputime_t new_exp)
-{
-       return expires == 0 || expires > new_exp;
-}
-
-/*
- * Insert the timer on the appropriate list before any timers that
- * expire later.  This must be called with the sighand lock held.
- */
-static void arm_timer(struct k_itimer *timer)
-{
-       struct task_struct *p = timer->it.cpu.task;
-       struct list_head *head, *listpos;
-       struct task_cputime *cputime_expires;
-       struct cpu_timer_list *const nt = &timer->it.cpu;
-       struct cpu_timer_list *next;
-
-       if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-               head = p->cpu_timers;
-               cputime_expires = &p->cputime_expires;
-       } else {
-               head = p->signal->cpu_timers;
-               cputime_expires = &p->signal->cputime_expires;
-       }
-       head += CPUCLOCK_WHICH(timer->it_clock);
-
-       listpos = head;
-       list_for_each_entry(next, head, entry) {
-               if (nt->expires < next->expires)
-                       break;
-               listpos = &next->entry;
-       }
-       list_add(&nt->entry, listpos);
-
-       if (listpos == head) {
-               unsigned long long exp = nt->expires;
-
-               /*
-                * We are the new earliest-expiring POSIX 1.b timer, hence
-                * need to update expiration cache. Take into account that
-                * for process timers we share expiration cache with itimers
-                * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
-                */
-
-               switch (CPUCLOCK_WHICH(timer->it_clock)) {
-               case CPUCLOCK_PROF:
-                       if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp)))
-                               cputime_expires->prof_exp = expires_to_cputime(exp);
-                       break;
-               case CPUCLOCK_VIRT:
-                       if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp)))
-                               cputime_expires->virt_exp = expires_to_cputime(exp);
-                       break;
-               case CPUCLOCK_SCHED:
-                       if (cputime_expires->sched_exp == 0 ||
-                           cputime_expires->sched_exp > exp)
-                               cputime_expires->sched_exp = exp;
-                       break;
-               }
-       }
-}
-
-/*
- * The timer is locked, fire it and arrange for its reload.
- */
-static void cpu_timer_fire(struct k_itimer *timer)
-{
-       if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
-               /*
-                * User don't want any signal.
-                */
-               timer->it.cpu.expires = 0;
-       } else if (unlikely(timer->sigq == NULL)) {
-               /*
-                * This a special case for clock_nanosleep,
-                * not a normal timer from sys_timer_create.
-                */
-               wake_up_process(timer->it_process);
-               timer->it.cpu.expires = 0;
-       } else if (timer->it.cpu.incr == 0) {
-               /*
-                * One-shot timer.  Clear it as soon as it's fired.
-                */
-               posix_timer_event(timer, 0);
-               timer->it.cpu.expires = 0;
-       } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
-               /*
-                * The signal did not get queued because the signal
-                * was ignored, so we won't get any callback to
-                * reload the timer.  But we need to keep it
-                * ticking in case the signal is deliverable next time.
-                */
-               posix_cpu_timer_schedule(timer);
-       }
-}
-
-/*
- * Sample a process (thread group) timer for the given group_leader task.
- * Must be called with task sighand lock held for safe while_each_thread()
- * traversal.
- */
-static int cpu_timer_sample_group(const clockid_t which_clock,
-                                 struct task_struct *p,
-                                 unsigned long long *sample)
-{
-       struct task_cputime cputime;
-
-       thread_group_cputimer(p, &cputime);
-       switch (CPUCLOCK_WHICH(which_clock)) {
-       default:
-               return -EINVAL;
-       case CPUCLOCK_PROF:
-               *sample = cputime_to_expires(cputime.utime + cputime.stime);
-               break;
-       case CPUCLOCK_VIRT:
-               *sample = cputime_to_expires(cputime.utime);
-               break;
-       case CPUCLOCK_SCHED:
-               *sample = cputime.sum_exec_runtime + task_delta_exec(p);
-               break;
-       }
-       return 0;
-}
-
-#ifdef CONFIG_NO_HZ_FULL
-static void nohz_kick_work_fn(struct work_struct *work)
-{
-       tick_nohz_full_kick_all();
-}
-
-static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
-
-/*
- * We need the IPIs to be sent from sane process context.
- * The posix cpu timers are always set with irqs disabled.
- */
-static void posix_cpu_timer_kick_nohz(void)
-{
-       if (context_tracking_is_enabled())
-               schedule_work(&nohz_kick_work);
-}
-
-bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
-{
-       if (!task_cputime_zero(&tsk->cputime_expires))
-               return false;
-
-       if (tsk->signal->cputimer.running)
-               return false;
-
-       return true;
-}
-#else
-static inline void posix_cpu_timer_kick_nohz(void) { }
-#endif
-
-/*
- * Guts of sys_timer_settime for CPU timers.
- * This is called with the timer locked and interrupts disabled.
- * If we return TIMER_RETRY, it's necessary to release the timer's lock
- * and try again.  (This happens when the timer is in the middle of firing.)
- */
-static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
-                              struct itimerspec *new, struct itimerspec *old)
-{
-       unsigned long flags;
-       struct sighand_struct *sighand;
-       struct task_struct *p = timer->it.cpu.task;
-       unsigned long long old_expires, new_expires, old_incr, val;
-       int ret;
-
-       WARN_ON_ONCE(p == NULL);
-
-       new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
-
-       /*
-        * Protect against sighand release/switch in exit/exec and p->cpu_timers
-        * and p->signal->cpu_timers read/write in arm_timer()
-        */
-       sighand = lock_task_sighand(p, &flags);
-       /*
-        * If p has just been reaped, we can no
-        * longer get any information about it at all.
-        */
-       if (unlikely(sighand == NULL)) {
-               return -ESRCH;
-       }
-
-       /*
-        * Disarm any old timer after extracting its expiry time.
-        */
-       WARN_ON_ONCE(!irqs_disabled());
-
-       ret = 0;
-       old_incr = timer->it.cpu.incr;
-       old_expires = timer->it.cpu.expires;
-       if (unlikely(timer->it.cpu.firing)) {
-               timer->it.cpu.firing = -1;
-               ret = TIMER_RETRY;
-       } else
-               list_del_init(&timer->it.cpu.entry);
-
-       /*
-        * We need to sample the current value to convert the new
-        * value from to relative and absolute, and to convert the
-        * old value from absolute to relative.  To set a process
-        * timer, we need a sample to balance the thread expiry
-        * times (in arm_timer).  With an absolute time, we must
-        * check if it's already passed.  In short, we need a sample.
-        */
-       if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-               cpu_clock_sample(timer->it_clock, p, &val);
-       } else {
-               cpu_timer_sample_group(timer->it_clock, p, &val);
-       }
-
-       if (old) {
-               if (old_expires == 0) {
-                       old->it_value.tv_sec = 0;
-                       old->it_value.tv_nsec = 0;
-               } else {
-                       /*
-                        * Update the timer in case it has
-                        * overrun already.  If it has,
-                        * we'll report it as having overrun
-                        * and with the next reloaded timer
-                        * already ticking, though we are
-                        * swallowing that pending
-                        * notification here to install the
-                        * new setting.
-                        */
-                       bump_cpu_timer(timer, val);
-                       if (val < timer->it.cpu.expires) {
-                               old_expires = timer->it.cpu.expires - val;
-                               sample_to_timespec(timer->it_clock,
-                                                  old_expires,
-                                                  &old->it_value);
-                       } else {
-                               old->it_value.tv_nsec = 1;
-                               old->it_value.tv_sec = 0;
-                       }
-               }
-       }
-
-       if (unlikely(ret)) {
-               /*
-                * We are colliding with the timer actually firing.
-                * Punt after filling in the timer's old value, and
-                * disable this firing since we are already reporting
-                * it as an overrun (thanks to bump_cpu_timer above).
-                */
-               unlock_task_sighand(p, &flags);
-               goto out;
-       }
-
-       if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
-               new_expires += val;
-       }
-
-       /*
-        * Install the new expiry time (or zero).
-        * For a timer with no notification action, we don't actually
-        * arm the timer (we'll just fake it for timer_gettime).
-        */
-       timer->it.cpu.expires = new_expires;
-       if (new_expires != 0 && val < new_expires) {
-               arm_timer(timer);
-       }
-
-       unlock_task_sighand(p, &flags);
-       /*
-        * Install the new reload setting, and
-        * set up the signal and overrun bookkeeping.
-        */
-       timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
-                                               &new->it_interval);
-
-       /*
-        * This acts as a modification timestamp for the timer,
-        * so any automatic reload attempt will punt on seeing
-        * that we have reset the timer manually.
-        */
-       timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
-               ~REQUEUE_PENDING;
-       timer->it_overrun_last = 0;
-       timer->it_overrun = -1;
-
-       if (new_expires != 0 && !(val < new_expires)) {
-               /*
-                * The designated time already passed, so we notify
-                * immediately, even if the thread never runs to
-                * accumulate more time on this clock.
-                */
-               cpu_timer_fire(timer);
-       }
-
-       ret = 0;
- out:
-       if (old) {
-               sample_to_timespec(timer->it_clock,
-                                  old_incr, &old->it_interval);
-       }
-       if (!ret)
-               posix_cpu_timer_kick_nohz();
-       return ret;
-}
-
-static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
-{
-       unsigned long long now;
-       struct task_struct *p = timer->it.cpu.task;
-
-       WARN_ON_ONCE(p == NULL);
-
-       /*
-        * Easy part: convert the reload time.
-        */
-       sample_to_timespec(timer->it_clock,
-                          timer->it.cpu.incr, &itp->it_interval);
-
-       if (timer->it.cpu.expires == 0) {       /* Timer not armed at all.  */
-               itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
-               return;
-       }
-
-       /*
-        * Sample the clock to take the difference with the expiry time.
-        */
-       if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-               cpu_clock_sample(timer->it_clock, p, &now);
-       } else {
-               struct sighand_struct *sighand;
-               unsigned long flags;
-
-               /*
-                * Protect against sighand release/switch in exit/exec and
-                * also make timer sampling safe if it ends up calling
-                * thread_group_cputime().
-                */
-               sighand = lock_task_sighand(p, &flags);
-               if (unlikely(sighand == NULL)) {
-                       /*
-                        * The process has been reaped.
-                        * We can't even collect a sample any more.
-                        * Call the timer disarmed, nothing else to do.
-                        */
-                       timer->it.cpu.expires = 0;
-                       sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
-                                          &itp->it_value);
-               } else {
-                       cpu_timer_sample_group(timer->it_clock, p, &now);
-                       unlock_task_sighand(p, &flags);
-               }
-       }
-
-       if (now < timer->it.cpu.expires) {
-               sample_to_timespec(timer->it_clock,
-                                  timer->it.cpu.expires - now,
-                                  &itp->it_value);
-       } else {
-               /*
-                * The timer should have expired already, but the firing
-                * hasn't taken place yet.  Say it's just about to expire.
-                */
-               itp->it_value.tv_nsec = 1;
-               itp->it_value.tv_sec = 0;
-       }
-}
-
-static unsigned long long
-check_timers_list(struct list_head *timers,
-                 struct list_head *firing,
-                 unsigned long long curr)
-{
-       int maxfire = 20;
-
-       while (!list_empty(timers)) {
-               struct cpu_timer_list *t;
-
-               t = list_first_entry(timers, struct cpu_timer_list, entry);
-
-               if (!--maxfire || curr < t->expires)
-                       return t->expires;
-
-               t->firing = 1;
-               list_move_tail(&t->entry, firing);
-       }
-
-       return 0;
-}
-
-/*
- * Check for any per-thread CPU timers that have fired and move them off
- * the tsk->cpu_timers[N] list onto the firing list.  Here we update the
- * tsk->it_*_expires values to reflect the remaining thread CPU timers.
- */
-static void check_thread_timers(struct task_struct *tsk,
-                               struct list_head *firing)
-{
-       struct list_head *timers = tsk->cpu_timers;
-       struct signal_struct *const sig = tsk->signal;
-       struct task_cputime *tsk_expires = &tsk->cputime_expires;
-       unsigned long long expires;
-       unsigned long soft;
-
-       expires = check_timers_list(timers, firing, prof_ticks(tsk));
-       tsk_expires->prof_exp = expires_to_cputime(expires);
-
-       expires = check_timers_list(++timers, firing, virt_ticks(tsk));
-       tsk_expires->virt_exp = expires_to_cputime(expires);
-
-       tsk_expires->sched_exp = check_timers_list(++timers, firing,
-                                                  tsk->se.sum_exec_runtime);
-
-       /*
-        * Check for the special case thread timers.
-        */
-       soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
-       if (soft != RLIM_INFINITY) {
-               unsigned long hard =
-                       ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
-
-               if (hard != RLIM_INFINITY &&
-                   tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
-                       /*
-                        * At the hard limit, we just die.
-                        * No need to calculate anything else now.
-                        */
-                       __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
-                       return;
-               }
-               if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
-                       /*
-                        * At the soft limit, send a SIGXCPU every second.
-                        */
-                       if (soft < hard) {
-                               soft += USEC_PER_SEC;
-                               sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
-                       }
-                       printk(KERN_INFO
-                               "RT Watchdog Timeout: %s[%d]\n",
-                               tsk->comm, task_pid_nr(tsk));
-                       __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
-               }
-       }
-}
-
-static void stop_process_timers(struct signal_struct *sig)
-{
-       struct thread_group_cputimer *cputimer = &sig->cputimer;
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&cputimer->lock, flags);
-       cputimer->running = 0;
-       raw_spin_unlock_irqrestore(&cputimer->lock, flags);
-}
-
-static u32 onecputick;
-
-static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
-                            unsigned long long *expires,
-                            unsigned long long cur_time, int signo)
-{
-       if (!it->expires)
-               return;
-
-       if (cur_time >= it->expires) {
-               if (it->incr) {
-                       it->expires += it->incr;
-                       it->error += it->incr_error;
-                       if (it->error >= onecputick) {
-                               it->expires -= cputime_one_jiffy;
-                               it->error -= onecputick;
-                       }
-               } else {
-                       it->expires = 0;
-               }
-
-               trace_itimer_expire(signo == SIGPROF ?
-                                   ITIMER_PROF : ITIMER_VIRTUAL,
-                                   tsk->signal->leader_pid, cur_time);
-               __group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
-       }
-
-       if (it->expires && (!*expires || it->expires < *expires)) {
-               *expires = it->expires;
-       }
-}
-
-/*
- * Check for any per-thread CPU timers that have fired and move them
- * off the tsk->*_timers list onto the firing list.  Per-thread timers
- * have already been taken off.
- */
-static void check_process_timers(struct task_struct *tsk,
-                                struct list_head *firing)
-{
-       struct signal_struct *const sig = tsk->signal;
-       unsigned long long utime, ptime, virt_expires, prof_expires;
-       unsigned long long sum_sched_runtime, sched_expires;
-       struct list_head *timers = sig->cpu_timers;
-       struct task_cputime cputime;
-       unsigned long soft;
-
-       /*
-        * Collect the current process totals.
-        */
-       thread_group_cputimer(tsk, &cputime);
-       utime = cputime_to_expires(cputime.utime);
-       ptime = utime + cputime_to_expires(cputime.stime);
-       sum_sched_runtime = cputime.sum_exec_runtime;
-
-       prof_expires = check_timers_list(timers, firing, ptime);
-       virt_expires = check_timers_list(++timers, firing, utime);
-       sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
-
-       /*
-        * Check for the special case process timers.
-        */
-       check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
-                        SIGPROF);
-       check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
-                        SIGVTALRM);
-       soft = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
-       if (soft != RLIM_INFINITY) {
-               unsigned long psecs = cputime_to_secs(ptime);
-               unsigned long hard =
-                       ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
-               cputime_t x;
-               if (psecs >= hard) {
-                       /*
-                        * At the hard limit, we just die.
-                        * No need to calculate anything else now.
-                        */
-                       __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
-                       return;
-               }
-               if (psecs >= soft) {
-                       /*
-                        * At the soft limit, send a SIGXCPU every second.
-                        */
-                       __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
-                       if (soft < hard) {
-                               soft++;
-                               sig->rlim[RLIMIT_CPU].rlim_cur = soft;
-                       }
-               }
-               x = secs_to_cputime(soft);
-               if (!prof_expires || x < prof_expires) {
-                       prof_expires = x;
-               }
-       }
-
-       sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires);
-       sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires);
-       sig->cputime_expires.sched_exp = sched_expires;
-       if (task_cputime_zero(&sig->cputime_expires))
-               stop_process_timers(sig);
-}
-
-/*
- * This is called from the signal code (via do_schedule_next_timer)
- * when the last timer signal was delivered and we have to reload the timer.
- */
-void posix_cpu_timer_schedule(struct k_itimer *timer)
-{
-       struct sighand_struct *sighand;
-       unsigned long flags;
-       struct task_struct *p = timer->it.cpu.task;
-       unsigned long long now;
-
-       WARN_ON_ONCE(p == NULL);
-
-       /*
-        * Fetch the current sample and update the timer's expiry time.
-        */
-       if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-               cpu_clock_sample(timer->it_clock, p, &now);
-               bump_cpu_timer(timer, now);
-               if (unlikely(p->exit_state))
-                       goto out;
-
-               /* Protect timer list r/w in arm_timer() */
-               sighand = lock_task_sighand(p, &flags);
-               if (!sighand)
-                       goto out;
-       } else {
-               /*
-                * Protect arm_timer() and timer sampling in case of call to
-                * thread_group_cputime().
-                */
-               sighand = lock_task_sighand(p, &flags);
-               if (unlikely(sighand == NULL)) {
-                       /*
-                        * The process has been reaped.
-                        * We can't even collect a sample any more.
-                        */
-                       timer->it.cpu.expires = 0;
-                       goto out;
-               } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
-                       unlock_task_sighand(p, &flags);
-                       /* Optimizations: if the process is dying, no need to rearm */
-                       goto out;
-               }
-               cpu_timer_sample_group(timer->it_clock, p, &now);
-               bump_cpu_timer(timer, now);
-               /* Leave the sighand locked for the call below.  */
-       }
-
-       /*
-        * Now re-arm for the new expiry time.
-        */
-       WARN_ON_ONCE(!irqs_disabled());
-       arm_timer(timer);
-       unlock_task_sighand(p, &flags);
-
-       /* Kick full dynticks CPUs in case they need to tick on the new timer */
-       posix_cpu_timer_kick_nohz();
-out:
-       timer->it_overrun_last = timer->it_overrun;
-       timer->it_overrun = -1;
-       ++timer->it_requeue_pending;
-}
-
-/**
- * task_cputime_expired - Compare two task_cputime entities.
- *
- * @sample:    The task_cputime structure to be checked for expiration.
- * @expires:   Expiration times, against which @sample will be checked.
- *
- * Checks @sample against @expires to see if any field of @sample has expired.
- * Returns true if any field of the former is greater than the corresponding
- * field of the latter if the latter field is set.  Otherwise returns false.
- */
-static inline int task_cputime_expired(const struct task_cputime *sample,
-                                       const struct task_cputime *expires)
-{
-       if (expires->utime && sample->utime >= expires->utime)
-               return 1;
-       if (expires->stime && sample->utime + sample->stime >= expires->stime)
-               return 1;
-       if (expires->sum_exec_runtime != 0 &&
-           sample->sum_exec_runtime >= expires->sum_exec_runtime)
-               return 1;
-       return 0;
-}
-
-/**
- * fastpath_timer_check - POSIX CPU timers fast path.
- *
- * @tsk:       The task (thread) being checked.
- *
- * Check the task and thread group timers.  If both are zero (there are no
- * timers set) return false.  Otherwise snapshot the task and thread group
- * timers and compare them with the corresponding expiration times.  Return
- * true if a timer has expired, else return false.
- */
-static inline int fastpath_timer_check(struct task_struct *tsk)
-{
-       struct signal_struct *sig;
-       cputime_t utime, stime;
-
-       task_cputime(tsk, &utime, &stime);
-
-       if (!task_cputime_zero(&tsk->cputime_expires)) {
-               struct task_cputime task_sample = {
-                       .utime = utime,
-                       .stime = stime,
-                       .sum_exec_runtime = tsk->se.sum_exec_runtime
-               };
-
-               if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
-                       return 1;
-       }
-
-       sig = tsk->signal;
-       if (sig->cputimer.running) {
-               struct task_cputime group_sample;
-
-               raw_spin_lock(&sig->cputimer.lock);
-               group_sample = sig->cputimer.cputime;
-               raw_spin_unlock(&sig->cputimer.lock);
-
-               if (task_cputime_expired(&group_sample, &sig->cputime_expires))
-                       return 1;
-       }
-
-       return 0;
-}
-
-/*
- * This is called from the timer interrupt handler.  The irq handler has
- * already updated our counts.  We need to check if any timers fire now.
- * Interrupts are disabled.
- */
-void run_posix_cpu_timers(struct task_struct *tsk)
-{
-       LIST_HEAD(firing);
-       struct k_itimer *timer, *next;
-       unsigned long flags;
-
-       WARN_ON_ONCE(!irqs_disabled());
-
-       /*
-        * The fast path checks that there are no expired thread or thread
-        * group timers.  If that's so, just return.
-        */
-       if (!fastpath_timer_check(tsk))
-               return;
-
-       if (!lock_task_sighand(tsk, &flags))
-               return;
-       /*
-        * Here we take off tsk->signal->cpu_timers[N] and
-        * tsk->cpu_timers[N] all the timers that are firing, and
-        * put them on the firing list.
-        */
-       check_thread_timers(tsk, &firing);
-       /*
-        * If there are any active process wide timers (POSIX 1.b, itimers,
-        * RLIMIT_CPU) cputimer must be running.
-        */
-       if (tsk->signal->cputimer.running)
-               check_process_timers(tsk, &firing);
-
-       /*
-        * We must release these locks before taking any timer's lock.
-        * There is a potential race with timer deletion here, as the
-        * siglock now protects our private firing list.  We have set
-        * the firing flag in each timer, so that a deletion attempt
-        * that gets the timer lock before we do will give it up and
-        * spin until we've taken care of that timer below.
-        */
-       unlock_task_sighand(tsk, &flags);
-
-       /*
-        * Now that all the timers on our list have the firing flag,
-        * no one will touch their list entries but us.  We'll take
-        * each timer's lock before clearing its firing flag, so no
-        * timer call will interfere.
-        */
-       list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
-               int cpu_firing;
-
-               spin_lock(&timer->it_lock);
-               list_del_init(&timer->it.cpu.entry);
-               cpu_firing = timer->it.cpu.firing;
-               timer->it.cpu.firing = 0;
-               /*
-                * The firing flag is -1 if we collided with a reset
-                * of the timer, which already reported this
-                * almost-firing as an overrun.  So don't generate an event.
-                */
-               if (likely(cpu_firing >= 0))
-                       cpu_timer_fire(timer);
-               spin_unlock(&timer->it_lock);
-       }
-}
-
-/*
- * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
- * The tsk->sighand->siglock must be held by the caller.
- */
-void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
-                          cputime_t *newval, cputime_t *oldval)
-{
-       unsigned long long now;
-
-       WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
-       cpu_timer_sample_group(clock_idx, tsk, &now);
-
-       if (oldval) {
-               /*
-                * We are setting itimer. The *oldval is absolute and we update
-                * it to be relative, *newval argument is relative and we update
-                * it to be absolute.
-                */
-               if (*oldval) {
-                       if (*oldval <= now) {
-                               /* Just about to fire. */
-                               *oldval = cputime_one_jiffy;
-                       } else {
-                               *oldval -= now;
-                       }
-               }
-
-               if (!*newval)
-                       goto out;
-               *newval += now;
-       }
-
-       /*
-        * Update expiration cache if we are the earliest timer, or eventually
-        * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
-        */
-       switch (clock_idx) {
-       case CPUCLOCK_PROF:
-               if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
-                       tsk->signal->cputime_expires.prof_exp = *newval;
-               break;
-       case CPUCLOCK_VIRT:
-               if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
-                       tsk->signal->cputime_expires.virt_exp = *newval;
-               break;
-       }
-out:
-       posix_cpu_timer_kick_nohz();
-}
-
-static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
-                           struct timespec *rqtp, struct itimerspec *it)
-{
-       struct k_itimer timer;
-       int error;
-
-       /*
-        * Set up a temporary timer and then wait for it to go off.
-        */
-       memset(&timer, 0, sizeof timer);
-       spin_lock_init(&timer.it_lock);
-       timer.it_clock = which_clock;
-       timer.it_overrun = -1;
-       error = posix_cpu_timer_create(&timer);
-       timer.it_process = current;
-       if (!error) {
-               static struct itimerspec zero_it;
-
-               memset(it, 0, sizeof *it);
-               it->it_value = *rqtp;
-
-               spin_lock_irq(&timer.it_lock);
-               error = posix_cpu_timer_set(&timer, flags, it, NULL);
-               if (error) {
-                       spin_unlock_irq(&timer.it_lock);
-                       return error;
-               }
-
-               while (!signal_pending(current)) {
-                       if (timer.it.cpu.expires == 0) {
-                               /*
-                                * Our timer fired and was reset, below
-                                * deletion can not fail.
-                                */
-                               posix_cpu_timer_del(&timer);
-                               spin_unlock_irq(&timer.it_lock);
-                               return 0;
-                       }
-
-                       /*
-                        * Block until cpu_timer_fire (or a signal) wakes us.
-                        */
-                       __set_current_state(TASK_INTERRUPTIBLE);
-                       spin_unlock_irq(&timer.it_lock);
-                       schedule();
-                       spin_lock_irq(&timer.it_lock);
-               }
-
-               /*
-                * We were interrupted by a signal.
-                */
-               sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
-               error = posix_cpu_timer_set(&timer, 0, &zero_it, it);
-               if (!error) {
-                       /*
-                        * Timer is now unarmed, deletion can not fail.
-                        */
-                       posix_cpu_timer_del(&timer);
-               }
-               spin_unlock_irq(&timer.it_lock);
-
-               while (error == TIMER_RETRY) {
-                       /*
-                        * We need to handle case when timer was or is in the
-                        * middle of firing. In other cases we already freed
-                        * resources.
-                        */
-                       spin_lock_irq(&timer.it_lock);
-                       error = posix_cpu_timer_del(&timer);
-                       spin_unlock_irq(&timer.it_lock);
-               }
-
-               if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
-                       /*
-                        * It actually did fire already.
-                        */
-                       return 0;
-               }
-
-               error = -ERESTART_RESTARTBLOCK;
-       }
-
-       return error;
-}
-
-static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
-
-static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
-                           struct timespec *rqtp, struct timespec __user *rmtp)
-{
-       struct restart_block *restart_block =
-               &current_thread_info()->restart_block;
-       struct itimerspec it;
-       int error;
-
-       /*
-        * Diagnose required errors first.
-        */
-       if (CPUCLOCK_PERTHREAD(which_clock) &&
-           (CPUCLOCK_PID(which_clock) == 0 ||
-            CPUCLOCK_PID(which_clock) == current->pid))
-               return -EINVAL;
-
-       error = do_cpu_nanosleep(which_clock, flags, rqtp, &it);
-
-       if (error == -ERESTART_RESTARTBLOCK) {
-
-               if (flags & TIMER_ABSTIME)
-                       return -ERESTARTNOHAND;
-               /*
-                * Report back to the user the time still remaining.
-                */
-               if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
-                       return -EFAULT;
-
-               restart_block->fn = posix_cpu_nsleep_restart;
-               restart_block->nanosleep.clockid = which_clock;
-               restart_block->nanosleep.rmtp = rmtp;
-               restart_block->nanosleep.expires = timespec_to_ns(rqtp);
-       }
-       return error;
-}
-
-static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
-{
-       clockid_t which_clock = restart_block->nanosleep.clockid;
-       struct timespec t;
-       struct itimerspec it;
-       int error;
-
-       t = ns_to_timespec(restart_block->nanosleep.expires);
-
-       error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
-
-       if (error == -ERESTART_RESTARTBLOCK) {
-               struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
-               /*
-                * Report back to the user the time still remaining.
-                */
-               if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
-                       return -EFAULT;
-
-               restart_block->nanosleep.expires = timespec_to_ns(&t);
-       }
-       return error;
-
-}
-
-#define PROCESS_CLOCK  MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
-#define THREAD_CLOCK   MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
-
-static int process_cpu_clock_getres(const clockid_t which_clock,
-                                   struct timespec *tp)
-{
-       return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
-}
-static int process_cpu_clock_get(const clockid_t which_clock,
-                                struct timespec *tp)
-{
-       return posix_cpu_clock_get(PROCESS_CLOCK, tp);
-}
-static int process_cpu_timer_create(struct k_itimer *timer)
-{
-       timer->it_clock = PROCESS_CLOCK;
-       return posix_cpu_timer_create(timer);
-}
-static int process_cpu_nsleep(const clockid_t which_clock, int flags,
-                             struct timespec *rqtp,
-                             struct timespec __user *rmtp)
-{
-       return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
-}
-static long process_cpu_nsleep_restart(struct restart_block *restart_block)
-{
-       return -EINVAL;
-}
-static int thread_cpu_clock_getres(const clockid_t which_clock,
-                                  struct timespec *tp)
-{
-       return posix_cpu_clock_getres(THREAD_CLOCK, tp);
-}
-static int thread_cpu_clock_get(const clockid_t which_clock,
-                               struct timespec *tp)
-{
-       return posix_cpu_clock_get(THREAD_CLOCK, tp);
-}
-static int thread_cpu_timer_create(struct k_itimer *timer)
-{
-       timer->it_clock = THREAD_CLOCK;
-       return posix_cpu_timer_create(timer);
-}
-
-struct k_clock clock_posix_cpu = {
-       .clock_getres   = posix_cpu_clock_getres,
-       .clock_set      = posix_cpu_clock_set,
-       .clock_get      = posix_cpu_clock_get,
-       .timer_create   = posix_cpu_timer_create,
-       .nsleep         = posix_cpu_nsleep,
-       .nsleep_restart = posix_cpu_nsleep_restart,
-       .timer_set      = posix_cpu_timer_set,
-       .timer_del      = posix_cpu_timer_del,
-       .timer_get      = posix_cpu_timer_get,
-};
-
-static __init int init_posix_cpu_timers(void)
-{
-       struct k_clock process = {
-               .clock_getres   = process_cpu_clock_getres,
-               .clock_get      = process_cpu_clock_get,
-               .timer_create   = process_cpu_timer_create,
-               .nsleep         = process_cpu_nsleep,
-               .nsleep_restart = process_cpu_nsleep_restart,
-       };
-       struct k_clock thread = {
-               .clock_getres   = thread_cpu_clock_getres,
-               .clock_get      = thread_cpu_clock_get,
-               .timer_create   = thread_cpu_timer_create,
-       };
-       struct timespec ts;
-
-       posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
-       posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
-
-       cputime_to_timespec(cputime_one_jiffy, &ts);
-       onecputick = ts.tv_nsec;
-       WARN_ON(ts.tv_sec != 0);
-
-       return 0;
-}
-__initcall(init_posix_cpu_timers);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
deleted file mode 100644 (file)
index 424c2d4..0000000
+++ /dev/null
@@ -1,1121 +0,0 @@
-/*
- * linux/kernel/posix-timers.c
- *
- *
- * 2002-10-15  Posix Clocks & timers
- *                           by George Anzinger george@mvista.com
- *
- *                          Copyright (C) 2002 2003 by MontaVista Software.
- *
- * 2004-06-01  Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
- *                          Copyright (C) 2004 Boris Hu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
-
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * MontaVista Software | 1237 East Arques Avenue | Sunnyvale | CA 94085 | USA
- */
-
-/* These are all the functions necessary to implement
- * POSIX clocks & timers
- */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/mutex.h>
-
-#include <asm/uaccess.h>
-#include <linux/list.h>
-#include <linux/init.h>
-#include <linux/compiler.h>
-#include <linux/hash.h>
-#include <linux/posix-clock.h>
-#include <linux/posix-timers.h>
-#include <linux/syscalls.h>
-#include <linux/wait.h>
-#include <linux/workqueue.h>
-#include <linux/export.h>
-#include <linux/hashtable.h>
-
-/*
- * Management arrays for POSIX timers. Timers are now kept in static hash table
- * with 512 entries.
- * Timer ids are allocated by local routine, which selects proper hash head by
- * key, constructed from current->signal address and per signal struct counter.
- * This keeps timer ids unique per process, but now they can intersect between
- * processes.
- */
-
-/*
- * Lets keep our timers in a slab cache :-)
- */
-static struct kmem_cache *posix_timers_cache;
-
-static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
-static DEFINE_SPINLOCK(hash_lock);
-
-/*
- * we assume that the new SIGEV_THREAD_ID shares no bits with the other
- * SIGEV values.  Here we put out an error if this assumption fails.
- */
-#if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
-                       ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
-#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
-#endif
-
-/*
- * parisc wants ENOTSUP instead of EOPNOTSUPP
- */
-#ifndef ENOTSUP
-# define ENANOSLEEP_NOTSUP EOPNOTSUPP
-#else
-# define ENANOSLEEP_NOTSUP ENOTSUP
-#endif
-
-/*
- * The timer ID is turned into a timer address by idr_find().
- * Verifying a valid ID consists of:
- *
- * a) checking that idr_find() returns other than -1.
- * b) checking that the timer id matches the one in the timer itself.
- * c) that the timer owner is in the callers thread group.
- */
-
-/*
- * CLOCKs: The POSIX standard calls for a couple of clocks and allows us
- *         to implement others.  This structure defines the various
- *         clocks.
- *
- * RESOLUTION: Clock resolution is used to round up timer and interval
- *         times, NOT to report clock times, which are reported with as
- *         much resolution as the system can muster.  In some cases this
- *         resolution may depend on the underlying clock hardware and
- *         may not be quantifiable until run time, and only then is the
- *         necessary code is written.  The standard says we should say
- *         something about this issue in the documentation...
- *
- * FUNCTIONS: The CLOCKs structure defines possible functions to
- *         handle various clock functions.
- *
- *         The standard POSIX timer management code assumes the
- *         following: 1.) The k_itimer struct (sched.h) is used for
- *         the timer.  2.) The list, it_lock, it_clock, it_id and
- *         it_pid fields are not modified by timer code.
- *
- * Permissions: It is assumed that the clock_settime() function defined
- *         for each clock will take care of permission checks.  Some
- *         clocks may be set able by any user (i.e. local process
- *         clocks) others not.  Currently the only set able clock we
- *         have is CLOCK_REALTIME and its high res counter part, both of
- *         which we beg off on and pass to do_sys_settimeofday().
- */
-
-static struct k_clock posix_clocks[MAX_CLOCKS];
-
-/*
- * These ones are defined below.
- */
-static int common_nsleep(const clockid_t, int flags, struct timespec *t,
-                        struct timespec __user *rmtp);
-static int common_timer_create(struct k_itimer *new_timer);
-static void common_timer_get(struct k_itimer *, struct itimerspec *);
-static int common_timer_set(struct k_itimer *, int,
-                           struct itimerspec *, struct itimerspec *);
-static int common_timer_del(struct k_itimer *timer);
-
-static enum hrtimer_restart posix_timer_fn(struct hrtimer *data);
-
-static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
-
-#define lock_timer(tid, flags)                                            \
-({     struct k_itimer *__timr;                                           \
-       __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags));  \
-       __timr;                                                            \
-})
-
-static int hash(struct signal_struct *sig, unsigned int nr)
-{
-       return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
-}
-
-static struct k_itimer *__posix_timers_find(struct hlist_head *head,
-                                           struct signal_struct *sig,
-                                           timer_t id)
-{
-       struct k_itimer *timer;
-
-       hlist_for_each_entry_rcu(timer, head, t_hash) {
-               if ((timer->it_signal == sig) && (timer->it_id == id))
-                       return timer;
-       }
-       return NULL;
-}
-
-static struct k_itimer *posix_timer_by_id(timer_t id)
-{
-       struct signal_struct *sig = current->signal;
-       struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
-
-       return __posix_timers_find(head, sig, id);
-}
-
-static int posix_timer_add(struct k_itimer *timer)
-{
-       struct signal_struct *sig = current->signal;
-       int first_free_id = sig->posix_timer_id;
-       struct hlist_head *head;
-       int ret = -ENOENT;
-
-       do {
-               spin_lock(&hash_lock);
-               head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
-               if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
-                       hlist_add_head_rcu(&timer->t_hash, head);
-                       ret = sig->posix_timer_id;
-               }
-               if (++sig->posix_timer_id < 0)
-                       sig->posix_timer_id = 0;
-               if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
-                       /* Loop over all possible ids completed */
-                       ret = -EAGAIN;
-               spin_unlock(&hash_lock);
-       } while (ret == -ENOENT);
-       return ret;
-}
-
-static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
-{
-       spin_unlock_irqrestore(&timr->it_lock, flags);
-}
-
-/* Get clock_realtime */
-static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp)
-{
-       ktime_get_real_ts(tp);
-       return 0;
-}
-
-/* Set clock_realtime */
-static int posix_clock_realtime_set(const clockid_t which_clock,
-                                   const struct timespec *tp)
-{
-       return do_sys_settimeofday(tp, NULL);
-}
-
-static int posix_clock_realtime_adj(const clockid_t which_clock,
-                                   struct timex *t)
-{
-       return do_adjtimex(t);
-}
-
-/*
- * Get monotonic time for posix timers
- */
-static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
-{
-       ktime_get_ts(tp);
-       return 0;
-}
-
-/*
- * Get monotonic-raw time for posix timers
- */
-static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
-{
-       getrawmonotonic(tp);
-       return 0;
-}
-
-
-static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp)
-{
-       *tp = current_kernel_time();
-       return 0;
-}
-
-static int posix_get_monotonic_coarse(clockid_t which_clock,
-                                               struct timespec *tp)
-{
-       *tp = get_monotonic_coarse();
-       return 0;
-}
-
-static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
-{
-       *tp = ktime_to_timespec(KTIME_LOW_RES);
-       return 0;
-}
-
-static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
-{
-       get_monotonic_boottime(tp);
-       return 0;
-}
-
-static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
-{
-       timekeeping_clocktai(tp);
-       return 0;
-}
-
-/*
- * Initialize everything, well, just everything in Posix clocks/timers ;)
- */
-static __init int init_posix_timers(void)
-{
-       struct k_clock clock_realtime = {
-               .clock_getres   = hrtimer_get_res,
-               .clock_get      = posix_clock_realtime_get,
-               .clock_set      = posix_clock_realtime_set,
-               .clock_adj      = posix_clock_realtime_adj,
-               .nsleep         = common_nsleep,
-               .nsleep_restart = hrtimer_nanosleep_restart,
-               .timer_create   = common_timer_create,
-               .timer_set      = common_timer_set,
-               .timer_get      = common_timer_get,
-               .timer_del      = common_timer_del,
-       };
-       struct k_clock clock_monotonic = {
-               .clock_getres   = hrtimer_get_res,
-               .clock_get      = posix_ktime_get_ts,
-               .nsleep         = common_nsleep,
-               .nsleep_restart = hrtimer_nanosleep_restart,
-               .timer_create   = common_timer_create,
-               .timer_set      = common_timer_set,
-               .timer_get      = common_timer_get,
-               .timer_del      = common_timer_del,
-       };
-       struct k_clock clock_monotonic_raw = {
-               .clock_getres   = hrtimer_get_res,
-               .clock_get      = posix_get_monotonic_raw,
-       };
-       struct k_clock clock_realtime_coarse = {
-               .clock_getres   = posix_get_coarse_res,
-               .clock_get      = posix_get_realtime_coarse,
-       };
-       struct k_clock clock_monotonic_coarse = {
-               .clock_getres   = posix_get_coarse_res,
-               .clock_get      = posix_get_monotonic_coarse,
-       };
-       struct k_clock clock_tai = {
-               .clock_getres   = hrtimer_get_res,
-               .clock_get      = posix_get_tai,
-               .nsleep         = common_nsleep,
-               .nsleep_restart = hrtimer_nanosleep_restart,
-               .timer_create   = common_timer_create,
-               .timer_set      = common_timer_set,
-               .timer_get      = common_timer_get,
-               .timer_del      = common_timer_del,
-       };
-       struct k_clock clock_boottime = {
-               .clock_getres   = hrtimer_get_res,
-               .clock_get      = posix_get_boottime,
-               .nsleep         = common_nsleep,
-               .nsleep_restart = hrtimer_nanosleep_restart,
-               .timer_create   = common_timer_create,
-               .timer_set      = common_timer_set,
-               .timer_get      = common_timer_get,
-               .timer_del      = common_timer_del,
-       };
-
-       posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
-       posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
-       posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
-       posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
-       posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
-       posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
-       posix_timers_register_clock(CLOCK_TAI, &clock_tai);
-
-       posix_timers_cache = kmem_cache_create("posix_timers_cache",
-                                       sizeof (struct k_itimer), 0, SLAB_PANIC,
-                                       NULL);
-       return 0;
-}
-
-__initcall(init_posix_timers);
-
-static void schedule_next_timer(struct k_itimer *timr)
-{
-       struct hrtimer *timer = &timr->it.real.timer;
-
-       if (timr->it.real.interval.tv64 == 0)
-               return;
-
-       timr->it_overrun += (unsigned int) hrtimer_forward(timer,
-                                               timer->base->get_time(),
-                                               timr->it.real.interval);
-
-       timr->it_overrun_last = timr->it_overrun;
-       timr->it_overrun = -1;
-       ++timr->it_requeue_pending;
-       hrtimer_restart(timer);
-}
-
-/*
- * This function is exported for use by the signal deliver code.  It is
- * called just prior to the info block being released and passes that
- * block to us.  It's function is to update the overrun entry AND to
- * restart the timer.  It should only be called if the timer is to be
- * restarted (i.e. we have flagged this in the sys_private entry of the
- * info block).
- *
- * To protect against the timer going away while the interrupt is queued,
- * we require that the it_requeue_pending flag be set.
- */
-void do_schedule_next_timer(struct siginfo *info)
-{
-       struct k_itimer *timr;
-       unsigned long flags;
-
-       timr = lock_timer(info->si_tid, &flags);
-
-       if (timr && timr->it_requeue_pending == info->si_sys_private) {
-               if (timr->it_clock < 0)
-                       posix_cpu_timer_schedule(timr);
-               else
-                       schedule_next_timer(timr);
-
-               info->si_overrun += timr->it_overrun_last;
-       }
-
-       if (timr)
-               unlock_timer(timr, flags);
-}
-
-int posix_timer_event(struct k_itimer *timr, int si_private)
-{
-       struct task_struct *task;
-       int shared, ret = -1;
-       /*
-        * FIXME: if ->sigq is queued we can race with
-        * dequeue_signal()->do_schedule_next_timer().
-        *
-        * If dequeue_signal() sees the "right" value of
-        * si_sys_private it calls do_schedule_next_timer().
-        * We re-queue ->sigq and drop ->it_lock().
-        * do_schedule_next_timer() locks the timer
-        * and re-schedules it while ->sigq is pending.
-        * Not really bad, but not that we want.
-        */
-       timr->sigq->info.si_sys_private = si_private;
-
-       rcu_read_lock();
-       task = pid_task(timr->it_pid, PIDTYPE_PID);
-       if (task) {
-               shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
-               ret = send_sigqueue(timr->sigq, task, shared);
-       }
-       rcu_read_unlock();
-       /* If we failed to send the signal the timer stops. */
-       return ret > 0;
-}
-EXPORT_SYMBOL_GPL(posix_timer_event);
-
-/*
- * This function gets called when a POSIX.1b interval timer expires.  It
- * is used as a callback from the kernel internal timer.  The
- * run_timer_list code ALWAYS calls with interrupts on.
-
- * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
- */
-static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
-{
-       struct k_itimer *timr;
-       unsigned long flags;
-       int si_private = 0;
-       enum hrtimer_restart ret = HRTIMER_NORESTART;
-
-       timr = container_of(timer, struct k_itimer, it.real.timer);
-       spin_lock_irqsave(&timr->it_lock, flags);
-
-       if (timr->it.real.interval.tv64 != 0)
-               si_private = ++timr->it_requeue_pending;
-
-       if (posix_timer_event(timr, si_private)) {
-               /*
-                * signal was not sent because of sig_ignor
-                * we will not get a call back to restart it AND
-                * it should be restarted.
-                */
-               if (timr->it.real.interval.tv64 != 0) {
-                       ktime_t now = hrtimer_cb_get_time(timer);
-
-                       /*
-                        * FIXME: What we really want, is to stop this
-                        * timer completely and restart it in case the
-                        * SIG_IGN is removed. This is a non trivial
-                        * change which involves sighand locking
-                        * (sigh !), which we don't want to do late in
-                        * the release cycle.
-                        *
-                        * For now we just let timers with an interval
-                        * less than a jiffie expire every jiffie to
-                        * avoid softirq starvation in case of SIG_IGN
-                        * and a very small interval, which would put
-                        * the timer right back on the softirq pending
-                        * list. By moving now ahead of time we trick
-                        * hrtimer_forward() to expire the timer
-                        * later, while we still maintain the overrun
-                        * accuracy, but have some inconsistency in
-                        * the timer_gettime() case. This is at least
-                        * better than a starved softirq. A more
-                        * complex fix which solves also another related
-                        * inconsistency is already in the pipeline.
-                        */
-#ifdef CONFIG_HIGH_RES_TIMERS
-                       {
-                               ktime_t kj = ktime_set(0, NSEC_PER_SEC / HZ);
-
-                               if (timr->it.real.interval.tv64 < kj.tv64)
-                                       now = ktime_add(now, kj);
-                       }
-#endif
-                       timr->it_overrun += (unsigned int)
-                               hrtimer_forward(timer, now,
-                                               timr->it.real.interval);
-                       ret = HRTIMER_RESTART;
-                       ++timr->it_requeue_pending;
-               }
-       }
-
-       unlock_timer(timr, flags);
-       return ret;
-}
-
-static struct pid *good_sigevent(sigevent_t * event)
-{
-       struct task_struct *rtn = current->group_leader;
-
-       if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
-               (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
-                !same_thread_group(rtn, current) ||
-                (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
-               return NULL;
-
-       if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
-           ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
-               return NULL;
-
-       return task_pid(rtn);
-}
-
-void posix_timers_register_clock(const clockid_t clock_id,
-                                struct k_clock *new_clock)
-{
-       if ((unsigned) clock_id >= MAX_CLOCKS) {
-               printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
-                      clock_id);
-               return;
-       }
-
-       if (!new_clock->clock_get) {
-               printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
-                      clock_id);
-               return;
-       }
-       if (!new_clock->clock_getres) {
-               printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
-                      clock_id);
-               return;
-       }
-
-       posix_clocks[clock_id] = *new_clock;
-}
-EXPORT_SYMBOL_GPL(posix_timers_register_clock);
-
-static struct k_itimer * alloc_posix_timer(void)
-{
-       struct k_itimer *tmr;
-       tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
-       if (!tmr)
-               return tmr;
-       if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
-               kmem_cache_free(posix_timers_cache, tmr);
-               return NULL;
-       }
-       memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
-       return tmr;
-}
-
-static void k_itimer_rcu_free(struct rcu_head *head)
-{
-       struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
-
-       kmem_cache_free(posix_timers_cache, tmr);
-}
-
-#define IT_ID_SET      1
-#define IT_ID_NOT_SET  0
-static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
-{
-       if (it_id_set) {
-               unsigned long flags;
-               spin_lock_irqsave(&hash_lock, flags);
-               hlist_del_rcu(&tmr->t_hash);
-               spin_unlock_irqrestore(&hash_lock, flags);
-       }
-       put_pid(tmr->it_pid);
-       sigqueue_free(tmr->sigq);
-       call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
-}
-
-static struct k_clock *clockid_to_kclock(const clockid_t id)
-{
-       if (id < 0)
-               return (id & CLOCKFD_MASK) == CLOCKFD ?
-                       &clock_posix_dynamic : &clock_posix_cpu;
-
-       if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres)
-               return NULL;
-       return &posix_clocks[id];
-}
-
-static int common_timer_create(struct k_itimer *new_timer)
-{
-       hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
-       return 0;
-}
-
-/* Create a POSIX.1b interval timer. */
-
-SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
-               struct sigevent __user *, timer_event_spec,
-               timer_t __user *, created_timer_id)
-{
-       struct k_clock *kc = clockid_to_kclock(which_clock);
-       struct k_itimer *new_timer;
-       int error, new_timer_id;
-       sigevent_t event;
-       int it_id_set = IT_ID_NOT_SET;
-
-       if (!kc)
-               return -EINVAL;
-       if (!kc->timer_create)
-               return -EOPNOTSUPP;
-
-       new_timer = alloc_posix_timer();
-       if (unlikely(!new_timer))
-               return -EAGAIN;
-
-       spin_lock_init(&new_timer->it_lock);
-       new_timer_id = posix_timer_add(new_timer);
-       if (new_timer_id < 0) {
-               error = new_timer_id;
-               goto out;
-       }
-
-       it_id_set = IT_ID_SET;
-       new_timer->it_id = (timer_t) new_timer_id;
-       new_timer->it_clock = which_clock;
-       new_timer->it_overrun = -1;
-
-       if (timer_event_spec) {
-               if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
-                       error = -EFAULT;
-                       goto out;
-               }
-               rcu_read_lock();
-               new_timer->it_pid = get_pid(good_sigevent(&event));
-               rcu_read_unlock();
-               if (!new_timer->it_pid) {
-                       error = -EINVAL;
-                       goto out;
-               }
-       } else {
-               event.sigev_notify = SIGEV_SIGNAL;
-               event.sigev_signo = SIGALRM;
-               event.sigev_value.sival_int = new_timer->it_id;
-               new_timer->it_pid = get_pid(task_tgid(current));
-       }
-
-       new_timer->it_sigev_notify     = event.sigev_notify;
-       new_timer->sigq->info.si_signo = event.sigev_signo;
-       new_timer->sigq->info.si_value = event.sigev_value;
-       new_timer->sigq->info.si_tid   = new_timer->it_id;
-       new_timer->sigq->info.si_code  = SI_TIMER;
-
-       if (copy_to_user(created_timer_id,
-                        &new_timer_id, sizeof (new_timer_id))) {
-               error = -EFAULT;
-               goto out;
-       }
-
-       error = kc->timer_create(new_timer);
-       if (error)
-               goto out;
-
-       spin_lock_irq(&current->sighand->siglock);
-       new_timer->it_signal = current->signal;
-       list_add(&new_timer->list, &current->signal->posix_timers);
-       spin_unlock_irq(&current->sighand->siglock);
-
-       return 0;
-       /*
-        * In the case of the timer belonging to another task, after
-        * the task is unlocked, the timer is owned by the other task
-        * and may cease to exist at any time.  Don't use or modify
-        * new_timer after the unlock call.
-        */
-out:
-       release_posix_timer(new_timer, it_id_set);
-       return error;
-}
-
-/*
- * Locking issues: We need to protect the result of the id look up until
- * we get the timer locked down so it is not deleted under us.  The
- * removal is done under the idr spinlock so we use that here to bridge
- * the find to the timer lock.  To avoid a dead lock, the timer id MUST
- * be release with out holding the timer lock.
- */
-static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
-{
-       struct k_itimer *timr;
-
-       /*
-        * timer_t could be any type >= int and we want to make sure any
-        * @timer_id outside positive int range fails lookup.
-        */
-       if ((unsigned long long)timer_id > INT_MAX)
-               return NULL;
-
-       rcu_read_lock();
-       timr = posix_timer_by_id(timer_id);
-       if (timr) {
-               spin_lock_irqsave(&timr->it_lock, *flags);
-               if (timr->it_signal == current->signal) {
-                       rcu_read_unlock();
-                       return timr;
-               }
-               spin_unlock_irqrestore(&timr->it_lock, *flags);
-       }
-       rcu_read_unlock();
-
-       return NULL;
-}
-
-/*
- * Get the time remaining on a POSIX.1b interval timer.  This function
- * is ALWAYS called with spin_lock_irq on the timer, thus it must not
- * mess with irq.
- *
- * We have a couple of messes to clean up here.  First there is the case
- * of a timer that has a requeue pending.  These timers should appear to
- * be in the timer list with an expiry as if we were to requeue them
- * now.
- *
- * The second issue is the SIGEV_NONE timer which may be active but is
- * not really ever put in the timer list (to save system resources).
- * This timer may be expired, and if so, we will do it here.  Otherwise
- * it is the same as a requeue pending timer WRT to what we should
- * report.
- */
-static void
-common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
-{
-       ktime_t now, remaining, iv;
-       struct hrtimer *timer = &timr->it.real.timer;
-
-       memset(cur_setting, 0, sizeof(struct itimerspec));
-
-       iv = timr->it.real.interval;
-
-       /* interval timer ? */
-       if (iv.tv64)
-               cur_setting->it_interval = ktime_to_timespec(iv);
-       else if (!hrtimer_active(timer) &&
-                (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
-               return;
-
-       now = timer->base->get_time();
-
-       /*
-        * When a requeue is pending or this is a SIGEV_NONE
-        * timer move the expiry time forward by intervals, so
-        * expiry is > now.
-        */
-       if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
-           (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
-               timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
-
-       remaining = ktime_sub(hrtimer_get_expires(timer), now);
-       /* Return 0 only, when the timer is expired and not pending */
-       if (remaining.tv64 <= 0) {
-               /*
-                * A single shot SIGEV_NONE timer must return 0, when
-                * it is expired !
-                */
-               if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
-                       cur_setting->it_value.tv_nsec = 1;
-       } else
-               cur_setting->it_value = ktime_to_timespec(remaining);
-}
-
-/* Get the time remaining on a POSIX.1b interval timer. */
-SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
-               struct itimerspec __user *, setting)
-{
-       struct itimerspec cur_setting;
-       struct k_itimer *timr;
-       struct k_clock *kc;
-       unsigned long flags;
-       int ret = 0;
-
-       timr = lock_timer(timer_id, &flags);
-       if (!timr)
-               return -EINVAL;
-
-       kc = clockid_to_kclock(timr->it_clock);
-       if (WARN_ON_ONCE(!kc || !kc->timer_get))
-               ret = -EINVAL;
-       else
-               kc->timer_get(timr, &cur_setting);
-
-       unlock_timer(timr, flags);
-
-       if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
-               return -EFAULT;
-
-       return ret;
-}
-
-/*
- * Get the number of overruns of a POSIX.1b interval timer.  This is to
- * be the overrun of the timer last delivered.  At the same time we are
- * accumulating overruns on the next timer.  The overrun is frozen when
- * the signal is delivered, either at the notify time (if the info block
- * is not queued) or at the actual delivery time (as we are informed by
- * the call back to do_schedule_next_timer().  So all we need to do is
- * to pick up the frozen overrun.
- */
-SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
-{
-       struct k_itimer *timr;
-       int overrun;
-       unsigned long flags;
-
-       timr = lock_timer(timer_id, &flags);
-       if (!timr)
-               return -EINVAL;
-
-       overrun = timr->it_overrun_last;
-       unlock_timer(timr, flags);
-
-       return overrun;
-}
-
-/* Set a POSIX.1b interval timer. */
-/* timr->it_lock is taken. */
-static int
-common_timer_set(struct k_itimer *timr, int flags,
-                struct itimerspec *new_setting, struct itimerspec *old_setting)
-{
-       struct hrtimer *timer = &timr->it.real.timer;
-       enum hrtimer_mode mode;
-
-       if (old_setting)
-               common_timer_get(timr, old_setting);
-
-       /* disable the timer */
-       timr->it.real.interval.tv64 = 0;
-       /*
-        * careful here.  If smp we could be in the "fire" routine which will
-        * be spinning as we hold the lock.  But this is ONLY an SMP issue.
-        */
-       if (hrtimer_try_to_cancel(timer) < 0)
-               return TIMER_RETRY;
-
-       timr->it_requeue_pending = (timr->it_requeue_pending + 2) & 
-               ~REQUEUE_PENDING;
-       timr->it_overrun_last = 0;
-
-       /* switch off the timer when it_value is zero */
-       if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
-               return 0;
-
-       mode = flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
-       hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
-       timr->it.real.timer.function = posix_timer_fn;
-
-       hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value));
-
-       /* Convert interval */
-       timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
-
-       /* SIGEV_NONE timers are not queued ! See common_timer_get */
-       if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
-               /* Setup correct expiry time for relative timers */
-               if (mode == HRTIMER_MODE_REL) {
-                       hrtimer_add_expires(timer, timer->base->get_time());
-               }
-               return 0;
-       }
-
-       hrtimer_start_expires(timer, mode);
-       return 0;
-}
-
-/* Set a POSIX.1b interval timer */
-SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
-               const struct itimerspec __user *, new_setting,
-               struct itimerspec __user *, old_setting)
-{
-       struct k_itimer *timr;
-       struct itimerspec new_spec, old_spec;
-       int error = 0;
-       unsigned long flag;
-       struct itimerspec *rtn = old_setting ? &old_spec : NULL;
-       struct k_clock *kc;
-
-       if (!new_setting)
-               return -EINVAL;
-
-       if (copy_from_user(&new_spec, new_setting, sizeof (new_spec)))
-               return -EFAULT;
-
-       if (!timespec_valid(&new_spec.it_interval) ||
-           !timespec_valid(&new_spec.it_value))
-               return -EINVAL;
-retry:
-       timr = lock_timer(timer_id, &flag);
-       if (!timr)
-               return -EINVAL;
-
-       kc = clockid_to_kclock(timr->it_clock);
-       if (WARN_ON_ONCE(!kc || !kc->timer_set))
-               error = -EINVAL;
-       else
-               error = kc->timer_set(timr, flags, &new_spec, rtn);
-
-       unlock_timer(timr, flag);
-       if (error == TIMER_RETRY) {
-               rtn = NULL;     // We already got the old time...
-               goto retry;
-       }
-
-       if (old_setting && !error &&
-           copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
-               error = -EFAULT;
-
-       return error;
-}
-
-static int common_timer_del(struct k_itimer *timer)
-{
-       timer->it.real.interval.tv64 = 0;
-
-       if (hrtimer_try_to_cancel(&timer->it.real.timer) < 0)
-               return TIMER_RETRY;
-       return 0;
-}
-
-static inline int timer_delete_hook(struct k_itimer *timer)
-{
-       struct k_clock *kc = clockid_to_kclock(timer->it_clock);
-
-       if (WARN_ON_ONCE(!kc || !kc->timer_del))
-               return -EINVAL;
-       return kc->timer_del(timer);
-}
-
-/* Delete a POSIX.1b interval timer. */
-SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
-{
-       struct k_itimer *timer;
-       unsigned long flags;
-
-retry_delete:
-       timer = lock_timer(timer_id, &flags);
-       if (!timer)
-               return -EINVAL;
-
-       if (timer_delete_hook(timer) == TIMER_RETRY) {
-               unlock_timer(timer, flags);
-               goto retry_delete;
-       }
-
-       spin_lock(&current->sighand->siglock);
-       list_del(&timer->list);
-       spin_unlock(&current->sighand->siglock);
-       /*
-        * This keeps any tasks waiting on the spin lock from thinking
-        * they got something (see the lock code above).
-        */
-       timer->it_signal = NULL;
-
-       unlock_timer(timer, flags);
-       release_posix_timer(timer, IT_ID_SET);
-       return 0;
-}
-
-/*
- * return timer owned by the process, used by exit_itimers
- */
-static void itimer_delete(struct k_itimer *timer)
-{
-       unsigned long flags;
-
-retry_delete:
-       spin_lock_irqsave(&timer->it_lock, flags);
-
-       if (timer_delete_hook(timer) == TIMER_RETRY) {
-               unlock_timer(timer, flags);
-               goto retry_delete;
-       }
-       list_del(&timer->list);
-       /*
-        * This keeps any tasks waiting on the spin lock from thinking
-        * they got something (see the lock code above).
-        */
-       timer->it_signal = NULL;
-
-       unlock_timer(timer, flags);
-       release_posix_timer(timer, IT_ID_SET);
-}
-
-/*
- * This is called by do_exit or de_thread, only when there are no more
- * references to the shared signal_struct.
- */
-void exit_itimers(struct signal_struct *sig)
-{
-       struct k_itimer *tmr;
-
-       while (!list_empty(&sig->posix_timers)) {
-               tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
-               itimer_delete(tmr);
-       }
-}
-
-SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
-               const struct timespec __user *, tp)
-{
-       struct k_clock *kc = clockid_to_kclock(which_clock);
-       struct timespec new_tp;
-
-       if (!kc || !kc->clock_set)
-               return -EINVAL;
-
-       if (copy_from_user(&new_tp, tp, sizeof (*tp)))
-               return -EFAULT;
-
-       return kc->clock_set(which_clock, &new_tp);
-}
-
-SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
-               struct timespec __user *,tp)
-{
-       struct k_clock *kc = clockid_to_kclock(which_clock);
-       struct timespec kernel_tp;
-       int error;
-
-       if (!kc)
-               return -EINVAL;
-
-       error = kc->clock_get(which_clock, &kernel_tp);
-
-       if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
-               error = -EFAULT;
-
-       return error;
-}
-
-SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
-               struct timex __user *, utx)
-{
-       struct k_clock *kc = clockid_to_kclock(which_clock);
-       struct timex ktx;
-       int err;
-
-       if (!kc)
-               return -EINVAL;
-       if (!kc->clock_adj)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&ktx, utx, sizeof(ktx)))
-               return -EFAULT;
-
-       err = kc->clock_adj(which_clock, &ktx);
-
-       if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
-               return -EFAULT;
-
-       return err;
-}
-
-SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
-               struct timespec __user *, tp)
-{
-       struct k_clock *kc = clockid_to_kclock(which_clock);
-       struct timespec rtn_tp;
-       int error;
-
-       if (!kc)
-               return -EINVAL;
-
-       error = kc->clock_getres(which_clock, &rtn_tp);
-
-       if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
-               error = -EFAULT;
-
-       return error;
-}
-
-/*
- * nanosleep for monotonic and realtime clocks
- */
-static int common_nsleep(const clockid_t which_clock, int flags,
-                        struct timespec *tsave, struct timespec __user *rmtp)
-{
-       return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ?
-                                HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
-                                which_clock);
-}
-
-SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
-               const struct timespec __user *, rqtp,
-               struct timespec __user *, rmtp)
-{
-       struct k_clock *kc = clockid_to_kclock(which_clock);
-       struct timespec t;
-
-       if (!kc)
-               return -EINVAL;
-       if (!kc->nsleep)
-               return -ENANOSLEEP_NOTSUP;
-
-       if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
-               return -EFAULT;
-
-       if (!timespec_valid(&t))
-               return -EINVAL;
-
-       return kc->nsleep(which_clock, flags, &t, rmtp);
-}
-
-/*
- * This will restart clock_nanosleep. This is required only by
- * compat_clock_nanosleep_restart for now.
- */
-long clock_nanosleep_restart(struct restart_block *restart_block)
-{
-       clockid_t which_clock = restart_block->nanosleep.clockid;
-       struct k_clock *kc = clockid_to_kclock(which_clock);
-
-       if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
-               return -EINVAL;
-
-       return kc->nsleep_restart(restart_block);
-}
diff --git a/kernel/time.c b/kernel/time.c
deleted file mode 100644 (file)
index 7c7964c..0000000
+++ /dev/null
@@ -1,714 +0,0 @@
-/*
- *  linux/kernel/time.c
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- *  This file contains the interface functions for the various
- *  time related system calls: time, stime, gettimeofday, settimeofday,
- *                            adjtime
- */
-/*
- * Modification history kernel/time.c
- *
- * 1993-09-02    Philip Gladstone
- *      Created file with time related functions from sched/core.c and adjtimex()
- * 1993-10-08    Torsten Duwe
- *      adjtime interface update and CMOS clock write code
- * 1995-08-13    Torsten Duwe
- *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
- * 1999-01-16    Ulrich Windl
- *     Introduced error checking for many cases in adjtimex().
- *     Updated NTP code according to technical memorandum Jan '96
- *     "A Kernel Model for Precision Timekeeping" by Dave Mills
- *     Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
- *     (Even though the technical memorandum forbids it)
- * 2004-07-14   Christoph Lameter
- *     Added getnstimeofday to allow the posix timer functions to return
- *     with nanosecond accuracy
- */
-
-#include <linux/export.h>
-#include <linux/timex.h>
-#include <linux/capability.h>
-#include <linux/timekeeper_internal.h>
-#include <linux/errno.h>
-#include <linux/syscalls.h>
-#include <linux/security.h>
-#include <linux/fs.h>
-#include <linux/math64.h>
-#include <linux/ptrace.h>
-
-#include <asm/uaccess.h>
-#include <asm/unistd.h>
-
-#include "timeconst.h"
-
-/*
- * The timezone where the local system is located.  Used as a default by some
- * programs who obtain this value by using gettimeofday.
- */
-struct timezone sys_tz;
-
-EXPORT_SYMBOL(sys_tz);
-
-#ifdef __ARCH_WANT_SYS_TIME
-
-/*
- * sys_time() can be implemented in user-level using
- * sys_gettimeofday().  Is this for backwards compatibility?  If so,
- * why not move it into the appropriate arch directory (for those
- * architectures that need it).
- */
-SYSCALL_DEFINE1(time, time_t __user *, tloc)
-{
-       time_t i = get_seconds();
-
-       if (tloc) {
-               if (put_user(i,tloc))
-                       return -EFAULT;
-       }
-       force_successful_syscall_return();
-       return i;
-}
-
-/*
- * sys_stime() can be implemented in user-level using
- * sys_settimeofday().  Is this for backwards compatibility?  If so,
- * why not move it into the appropriate arch directory (for those
- * architectures that need it).
- */
-
-SYSCALL_DEFINE1(stime, time_t __user *, tptr)
-{
-       struct timespec tv;
-       int err;
-
-       if (get_user(tv.tv_sec, tptr))
-               return -EFAULT;
-
-       tv.tv_nsec = 0;
-
-       err = security_settime(&tv, NULL);
-       if (err)
-               return err;
-
-       do_settimeofday(&tv);
-       return 0;
-}
-
-#endif /* __ARCH_WANT_SYS_TIME */
-
-SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
-               struct timezone __user *, tz)
-{
-       if (likely(tv != NULL)) {
-               struct timeval ktv;
-               do_gettimeofday(&ktv);
-               if (copy_to_user(tv, &ktv, sizeof(ktv)))
-                       return -EFAULT;
-       }
-       if (unlikely(tz != NULL)) {
-               if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
-                       return -EFAULT;
-       }
-       return 0;
-}
-
-/*
- * Indicates if there is an offset between the system clock and the hardware
- * clock/persistent clock/rtc.
- */
-int persistent_clock_is_local;
-
-/*
- * Adjust the time obtained from the CMOS to be UTC time instead of
- * local time.
- *
- * This is ugly, but preferable to the alternatives.  Otherwise we
- * would either need to write a program to do it in /etc/rc (and risk
- * confusion if the program gets run more than once; it would also be
- * hard to make the program warp the clock precisely n hours)  or
- * compile in the timezone information into the kernel.  Bad, bad....
- *
- *                                             - TYT, 1992-01-01
- *
- * The best thing to do is to keep the CMOS clock in universal time (UTC)
- * as real UNIX machines always do it. This avoids all headaches about
- * daylight saving times and warping kernel clocks.
- */
-static inline void warp_clock(void)
-{
-       if (sys_tz.tz_minuteswest != 0) {
-               struct timespec adjust;
-
-               persistent_clock_is_local = 1;
-               adjust.tv_sec = sys_tz.tz_minuteswest * 60;
-               adjust.tv_nsec = 0;
-               timekeeping_inject_offset(&adjust);
-       }
-}
-
-/*
- * In case for some reason the CMOS clock has not already been running
- * in UTC, but in some local time: The first time we set the timezone,
- * we will warp the clock so that it is ticking UTC time instead of
- * local time. Presumably, if someone is setting the timezone then we
- * are running in an environment where the programs understand about
- * timezones. This should be done at boot time in the /etc/rc script,
- * as soon as possible, so that the clock can be set right. Otherwise,
- * various programs will get confused when the clock gets warped.
- */
-
-int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
-{
-       static int firsttime = 1;
-       int error = 0;
-
-       if (tv && !timespec_valid(tv))
-               return -EINVAL;
-
-       error = security_settime(tv, tz);
-       if (error)
-               return error;
-
-       if (tz) {
-               sys_tz = *tz;
-               update_vsyscall_tz();
-               if (firsttime) {
-                       firsttime = 0;
-                       if (!tv)
-                               warp_clock();
-               }
-       }
-       if (tv)
-               return do_settimeofday(tv);
-       return 0;
-}
-
-SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv,
-               struct timezone __user *, tz)
-{
-       struct timeval user_tv;
-       struct timespec new_ts;
-       struct timezone new_tz;
-
-       if (tv) {
-               if (copy_from_user(&user_tv, tv, sizeof(*tv)))
-                       return -EFAULT;
-               new_ts.tv_sec = user_tv.tv_sec;
-               new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
-       }
-       if (tz) {
-               if (copy_from_user(&new_tz, tz, sizeof(*tz)))
-                       return -EFAULT;
-       }
-
-       return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
-}
-
-SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
-{
-       struct timex txc;               /* Local copy of parameter */
-       int ret;
-
-       /* Copy the user data space into the kernel copy
-        * structure. But bear in mind that the structures
-        * may change
-        */
-       if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
-               return -EFAULT;
-       ret = do_adjtimex(&txc);
-       return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
-}
-
-/**
- * current_fs_time - Return FS time
- * @sb: Superblock.
- *
- * Return the current time truncated to the time granularity supported by
- * the fs.
- */
-struct timespec current_fs_time(struct super_block *sb)
-{
-       struct timespec now = current_kernel_time();
-       return timespec_trunc(now, sb->s_time_gran);
-}
-EXPORT_SYMBOL(current_fs_time);
-
-/*
- * Convert jiffies to milliseconds and back.
- *
- * Avoid unnecessary multiplications/divisions in the
- * two most common HZ cases:
- */
-unsigned int jiffies_to_msecs(const unsigned long j)
-{
-#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
-       return (MSEC_PER_SEC / HZ) * j;
-#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
-       return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
-#else
-# if BITS_PER_LONG == 32
-       return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32;
-# else
-       return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN;
-# endif
-#endif
-}
-EXPORT_SYMBOL(jiffies_to_msecs);
-
-unsigned int jiffies_to_usecs(const unsigned long j)
-{
-#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
-       return (USEC_PER_SEC / HZ) * j;
-#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
-       return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC);
-#else
-# if BITS_PER_LONG == 32
-       return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32;
-# else
-       return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN;
-# endif
-#endif
-}
-EXPORT_SYMBOL(jiffies_to_usecs);
-
-/**
- * timespec_trunc - Truncate timespec to a granularity
- * @t: Timespec
- * @gran: Granularity in ns.
- *
- * Truncate a timespec to a granularity. gran must be smaller than a second.
- * Always rounds down.
- *
- * This function should be only used for timestamps returned by
- * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because
- * it doesn't handle the better resolution of the latter.
- */
-struct timespec timespec_trunc(struct timespec t, unsigned gran)
-{
-       /*
-        * Division is pretty slow so avoid it for common cases.
-        * Currently current_kernel_time() never returns better than
-        * jiffies resolution. Exploit that.
-        */
-       if (gran <= jiffies_to_usecs(1) * 1000) {
-               /* nothing */
-       } else if (gran == 1000000000) {
-               t.tv_nsec = 0;
-       } else {
-               t.tv_nsec -= t.tv_nsec % gran;
-       }
-       return t;
-}
-EXPORT_SYMBOL(timespec_trunc);
-
-/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
- * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
- * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
- *
- * [For the Julian calendar (which was used in Russia before 1917,
- * Britain & colonies before 1752, anywhere else before 1582,
- * and is still in use by some communities) leave out the
- * -year/100+year/400 terms, and add 10.]
- *
- * This algorithm was first published by Gauss (I think).
- *
- * WARNING: this function will overflow on 2106-02-07 06:28:16 on
- * machines where long is 32-bit! (However, as time_t is signed, we
- * will already get problems at other places on 2038-01-19 03:14:08)
- */
-unsigned long
-mktime(const unsigned int year0, const unsigned int mon0,
-       const unsigned int day, const unsigned int hour,
-       const unsigned int min, const unsigned int sec)
-{
-       unsigned int mon = mon0, year = year0;
-
-       /* 1..12 -> 11,12,1..10 */
-       if (0 >= (int) (mon -= 2)) {
-               mon += 12;      /* Puts Feb last since it has leap day */
-               year -= 1;
-       }
-
-       return ((((unsigned long)
-                 (year/4 - year/100 + year/400 + 367*mon/12 + day) +
-                 year*365 - 719499
-           )*24 + hour /* now have hours */
-         )*60 + min /* now have minutes */
-       )*60 + sec; /* finally seconds */
-}
-
-EXPORT_SYMBOL(mktime);
-
-/**
- * set_normalized_timespec - set timespec sec and nsec parts and normalize
- *
- * @ts:                pointer to timespec variable to be set
- * @sec:       seconds to set
- * @nsec:      nanoseconds to set
- *
- * Set seconds and nanoseconds field of a timespec variable and
- * normalize to the timespec storage format
- *
- * Note: The tv_nsec part is always in the range of
- *     0 <= tv_nsec < NSEC_PER_SEC
- * For negative values only the tv_sec field is negative !
- */
-void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
-{
-       while (nsec >= NSEC_PER_SEC) {
-               /*
-                * The following asm() prevents the compiler from
-                * optimising this loop into a modulo operation. See
-                * also __iter_div_u64_rem() in include/linux/time.h
-                */
-               asm("" : "+rm"(nsec));
-               nsec -= NSEC_PER_SEC;
-               ++sec;
-       }
-       while (nsec < 0) {
-               asm("" : "+rm"(nsec));
-               nsec += NSEC_PER_SEC;
-               --sec;
-       }
-       ts->tv_sec = sec;
-       ts->tv_nsec = nsec;
-}
-EXPORT_SYMBOL(set_normalized_timespec);
-
-/**
- * ns_to_timespec - Convert nanoseconds to timespec
- * @nsec:       the nanoseconds value to be converted
- *
- * Returns the timespec representation of the nsec parameter.
- */
-struct timespec ns_to_timespec(const s64 nsec)
-{
-       struct timespec ts;
-       s32 rem;
-
-       if (!nsec)
-               return (struct timespec) {0, 0};
-
-       ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
-       if (unlikely(rem < 0)) {
-               ts.tv_sec--;
-               rem += NSEC_PER_SEC;
-       }
-       ts.tv_nsec = rem;
-
-       return ts;
-}
-EXPORT_SYMBOL(ns_to_timespec);
-
-/**
- * ns_to_timeval - Convert nanoseconds to timeval
- * @nsec:       the nanoseconds value to be converted
- *
- * Returns the timeval representation of the nsec parameter.
- */
-struct timeval ns_to_timeval(const s64 nsec)
-{
-       struct timespec ts = ns_to_timespec(nsec);
-       struct timeval tv;
-
-       tv.tv_sec = ts.tv_sec;
-       tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000;
-
-       return tv;
-}
-EXPORT_SYMBOL(ns_to_timeval);
-
-/*
- * When we convert to jiffies then we interpret incoming values
- * the following way:
- *
- * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
- *
- * - 'too large' values [that would result in larger than
- *   MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
- *
- * - all other values are converted to jiffies by either multiplying
- *   the input value by a factor or dividing it with a factor
- *
- * We must also be careful about 32-bit overflows.
- */
-unsigned long msecs_to_jiffies(const unsigned int m)
-{
-       /*
-        * Negative value, means infinite timeout:
-        */
-       if ((int)m < 0)
-               return MAX_JIFFY_OFFSET;
-
-#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
-       /*
-        * HZ is equal to or smaller than 1000, and 1000 is a nice
-        * round multiple of HZ, divide with the factor between them,
-        * but round upwards:
-        */
-       return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
-#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
-       /*
-        * HZ is larger than 1000, and HZ is a nice round multiple of
-        * 1000 - simply multiply with the factor between them.
-        *
-        * But first make sure the multiplication result cannot
-        * overflow:
-        */
-       if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
-               return MAX_JIFFY_OFFSET;
-
-       return m * (HZ / MSEC_PER_SEC);
-#else
-       /*
-        * Generic case - multiply, round and divide. But first
-        * check that if we are doing a net multiplication, that
-        * we wouldn't overflow:
-        */
-       if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
-               return MAX_JIFFY_OFFSET;
-
-       return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32)
-               >> MSEC_TO_HZ_SHR32;
-#endif
-}
-EXPORT_SYMBOL(msecs_to_jiffies);
-
-unsigned long usecs_to_jiffies(const unsigned int u)
-{
-       if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
-               return MAX_JIFFY_OFFSET;
-#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
-       return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
-#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
-       return u * (HZ / USEC_PER_SEC);
-#else
-       return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32)
-               >> USEC_TO_HZ_SHR32;
-#endif
-}
-EXPORT_SYMBOL(usecs_to_jiffies);
-
-/*
- * The TICK_NSEC - 1 rounds up the value to the next resolution.  Note
- * that a remainder subtract here would not do the right thing as the
- * resolution values don't fall on second boundries.  I.e. the line:
- * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding.
- *
- * Rather, we just shift the bits off the right.
- *
- * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec
- * value to a scaled second value.
- */
-unsigned long
-timespec_to_jiffies(const struct timespec *value)
-{
-       unsigned long sec = value->tv_sec;
-       long nsec = value->tv_nsec + TICK_NSEC - 1;
-
-       if (sec >= MAX_SEC_IN_JIFFIES){
-               sec = MAX_SEC_IN_JIFFIES;
-               nsec = 0;
-       }
-       return (((u64)sec * SEC_CONVERSION) +
-               (((u64)nsec * NSEC_CONVERSION) >>
-                (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
-
-}
-EXPORT_SYMBOL(timespec_to_jiffies);
-
-void
-jiffies_to_timespec(const unsigned long jiffies, struct timespec *value)
-{
-       /*
-        * Convert jiffies to nanoseconds and separate with
-        * one divide.
-        */
-       u32 rem;
-       value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
-                                   NSEC_PER_SEC, &rem);
-       value->tv_nsec = rem;
-}
-EXPORT_SYMBOL(jiffies_to_timespec);
-
-/* Same for "timeval"
- *
- * Well, almost.  The problem here is that the real system resolution is
- * in nanoseconds and the value being converted is in micro seconds.
- * Also for some machines (those that use HZ = 1024, in-particular),
- * there is a LARGE error in the tick size in microseconds.
-
- * The solution we use is to do the rounding AFTER we convert the
- * microsecond part.  Thus the USEC_ROUND, the bits to be shifted off.
- * Instruction wise, this should cost only an additional add with carry
- * instruction above the way it was done above.
- */
-unsigned long
-timeval_to_jiffies(const struct timeval *value)
-{
-       unsigned long sec = value->tv_sec;
-       long usec = value->tv_usec;
-
-       if (sec >= MAX_SEC_IN_JIFFIES){
-               sec = MAX_SEC_IN_JIFFIES;
-               usec = 0;
-       }
-       return (((u64)sec * SEC_CONVERSION) +
-               (((u64)usec * USEC_CONVERSION + USEC_ROUND) >>
-                (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
-}
-EXPORT_SYMBOL(timeval_to_jiffies);
-
-void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value)
-{
-       /*
-        * Convert jiffies to nanoseconds and separate with
-        * one divide.
-        */
-       u32 rem;
-
-       value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
-                                   NSEC_PER_SEC, &rem);
-       value->tv_usec = rem / NSEC_PER_USEC;
-}
-EXPORT_SYMBOL(jiffies_to_timeval);
-
-/*
- * Convert jiffies/jiffies_64 to clock_t and back.
- */
-clock_t jiffies_to_clock_t(unsigned long x)
-{
-#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
-# if HZ < USER_HZ
-       return x * (USER_HZ / HZ);
-# else
-       return x / (HZ / USER_HZ);
-# endif
-#else
-       return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ);
-#endif
-}
-EXPORT_SYMBOL(jiffies_to_clock_t);
-
-unsigned long clock_t_to_jiffies(unsigned long x)
-{
-#if (HZ % USER_HZ)==0
-       if (x >= ~0UL / (HZ / USER_HZ))
-               return ~0UL;
-       return x * (HZ / USER_HZ);
-#else
-       /* Don't worry about loss of precision here .. */
-       if (x >= ~0UL / HZ * USER_HZ)
-               return ~0UL;
-
-       /* .. but do try to contain it here */
-       return div_u64((u64)x * HZ, USER_HZ);
-#endif
-}
-EXPORT_SYMBOL(clock_t_to_jiffies);
-
-u64 jiffies_64_to_clock_t(u64 x)
-{
-#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
-# if HZ < USER_HZ
-       x = div_u64(x * USER_HZ, HZ);
-# elif HZ > USER_HZ
-       x = div_u64(x, HZ / USER_HZ);
-# else
-       /* Nothing to do */
-# endif
-#else
-       /*
-        * There are better ways that don't overflow early,
-        * but even this doesn't overflow in hundreds of years
-        * in 64 bits, so..
-        */
-       x = div_u64(x * TICK_NSEC, (NSEC_PER_SEC / USER_HZ));
-#endif
-       return x;
-}
-EXPORT_SYMBOL(jiffies_64_to_clock_t);
-
-u64 nsec_to_clock_t(u64 x)
-{
-#if (NSEC_PER_SEC % USER_HZ) == 0
-       return div_u64(x, NSEC_PER_SEC / USER_HZ);
-#elif (USER_HZ % 512) == 0
-       return div_u64(x * USER_HZ / 512, NSEC_PER_SEC / 512);
-#else
-       /*
-         * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024,
-         * overflow after 64.99 years.
-         * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
-         */
-       return div_u64(x * 9, (9ull * NSEC_PER_SEC + (USER_HZ / 2)) / USER_HZ);
-#endif
-}
-
-/**
- * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
- *
- * @n: nsecs in u64
- *
- * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
- * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
- * for scheduler, not for use in device drivers to calculate timeout value.
- *
- * note:
- *   NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
- *   ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
- */
-u64 nsecs_to_jiffies64(u64 n)
-{
-#if (NSEC_PER_SEC % HZ) == 0
-       /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
-       return div_u64(n, NSEC_PER_SEC / HZ);
-#elif (HZ % 512) == 0
-       /* overflow after 292 years if HZ = 1024 */
-       return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
-#else
-       /*
-        * Generic case - optimized for cases where HZ is a multiple of 3.
-        * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
-        */
-       return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
-#endif
-}
-
-/**
- * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
- *
- * @n: nsecs in u64
- *
- * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
- * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
- * for scheduler, not for use in device drivers to calculate timeout value.
- *
- * note:
- *   NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
- *   ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
- */
-unsigned long nsecs_to_jiffies(u64 n)
-{
-       return (unsigned long)nsecs_to_jiffies64(n);
-}
-
-/*
- * Add two timespec values and do a safety check for overflow.
- * It's assumed that both values are valid (>= 0)
- */
-struct timespec timespec_add_safe(const struct timespec lhs,
-                                 const struct timespec rhs)
-{
-       struct timespec res;
-
-       set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec,
-                               lhs.tv_nsec + rhs.tv_nsec);
-
-       if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)
-               res.tv_sec = TIME_T_MAX;
-
-       return res;
-}
index 57a413fd0ebf557b947e8fe8b38e3c180f9bfedd..e59ce8b1b5500703ee79f1b32c902205a1e92dda 100644 (file)
@@ -1,3 +1,4 @@
+obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
 obj-y += timeconv.o posix-clock.o alarmtimer.o
 
@@ -12,3 +13,19 @@ obj-$(CONFIG_TICK_ONESHOT)                   += tick-oneshot.o
 obj-$(CONFIG_TICK_ONESHOT)                     += tick-sched.o
 obj-$(CONFIG_TIMER_STATS)                      += timer_stats.o
 obj-$(CONFIG_DEBUG_FS)                         += timekeeping_debug.o
+
+$(obj)/time.o: $(obj)/timeconst.h
+
+quiet_cmd_hzfile = HZFILE  $@
+      cmd_hzfile = echo "hz=$(CONFIG_HZ)" > $@
+
+targets += hz.bc
+$(obj)/hz.bc: $(objtree)/include/config/hz.h FORCE
+       $(call if_changed,hzfile)
+
+quiet_cmd_bc  = BC      $@
+      cmd_bc  = bc -q $(filter-out FORCE,$^) > $@
+
+targets += timeconst.h
+$(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
+       $(call if_changed,bc)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
new file mode 100644 (file)
index 0000000..3ab2899
--- /dev/null
@@ -0,0 +1,1915 @@
+/*
+ *  linux/kernel/hrtimer.c
+ *
+ *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
+ *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
+ *
+ *  High-resolution kernel timers
+ *
+ *  In contrast to the low-resolution timeout API implemented in
+ *  kernel/timer.c, hrtimers provide finer resolution and accuracy
+ *  depending on system configuration and capabilities.
+ *
+ *  These timers are currently used for:
+ *   - itimers
+ *   - POSIX timers
+ *   - nanosleep
+ *   - precise in-kernel timing
+ *
+ *  Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  Credits:
+ *     based on kernel/timer.c
+ *
+ *     Help, testing, suggestions, bugfixes, improvements were
+ *     provided by:
+ *
+ *     George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
+ *     et. al.
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/cpu.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
+#include <linux/notifier.h>
+#include <linux/syscalls.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/tick.h>
+#include <linux/seq_file.h>
+#include <linux/err.h>
+#include <linux/debugobjects.h>
+#include <linux/sched.h>
+#include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
+#include <linux/sched/deadline.h>
+#include <linux/timer.h>
+#include <linux/freezer.h>
+
+#include <asm/uaccess.h>
+
+#include <trace/events/timer.h>
+
+/*
+ * The timer bases:
+ *
+ * There are more clockids then hrtimer bases. Thus, we index
+ * into the timer bases by the hrtimer_base_type enum. When trying
+ * to reach a base using a clockid, hrtimer_clockid_to_base()
+ * is used to convert from clockid to the proper hrtimer_base_type.
+ */
+DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
+{
+
+       .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
+       .clock_base =
+       {
+               {
+                       .index = HRTIMER_BASE_MONOTONIC,
+                       .clockid = CLOCK_MONOTONIC,
+                       .get_time = &ktime_get,
+                       .resolution = KTIME_LOW_RES,
+               },
+               {
+                       .index = HRTIMER_BASE_REALTIME,
+                       .clockid = CLOCK_REALTIME,
+                       .get_time = &ktime_get_real,
+                       .resolution = KTIME_LOW_RES,
+               },
+               {
+                       .index = HRTIMER_BASE_BOOTTIME,
+                       .clockid = CLOCK_BOOTTIME,
+                       .get_time = &ktime_get_boottime,
+                       .resolution = KTIME_LOW_RES,
+               },
+               {
+                       .index = HRTIMER_BASE_TAI,
+                       .clockid = CLOCK_TAI,
+                       .get_time = &ktime_get_clocktai,
+                       .resolution = KTIME_LOW_RES,
+               },
+       }
+};
+
+static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
+       [CLOCK_REALTIME]        = HRTIMER_BASE_REALTIME,
+       [CLOCK_MONOTONIC]       = HRTIMER_BASE_MONOTONIC,
+       [CLOCK_BOOTTIME]        = HRTIMER_BASE_BOOTTIME,
+       [CLOCK_TAI]             = HRTIMER_BASE_TAI,
+};
+
+static inline int hrtimer_clockid_to_base(clockid_t clock_id)
+{
+       return hrtimer_clock_to_base_table[clock_id];
+}
+
+
+/*
+ * Get the coarse grained time at the softirq based on xtime and
+ * wall_to_monotonic.
+ */
+static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
+{
+       ktime_t xtim, mono, boot;
+       struct timespec xts, tom, slp;
+       s32 tai_offset;
+
+       get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
+       tai_offset = timekeeping_get_tai_offset();
+
+       xtim = timespec_to_ktime(xts);
+       mono = ktime_add(xtim, timespec_to_ktime(tom));
+       boot = ktime_add(mono, timespec_to_ktime(slp));
+       base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
+       base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
+       base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
+       base->clock_base[HRTIMER_BASE_TAI].softirq_time =
+                               ktime_add(xtim, ktime_set(tai_offset, 0));
+}
+
+/*
+ * Functions and macros which are different for UP/SMP systems are kept in a
+ * single place
+ */
+#ifdef CONFIG_SMP
+
+/*
+ * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
+ * means that all timers which are tied to this base via timer->base are
+ * locked, and the base itself is locked too.
+ *
+ * So __run_timers/migrate_timers can safely modify all timers which could
+ * be found on the lists/queues.
+ *
+ * When the timer's base is locked, and the timer removed from list, it is
+ * possible to set timer->base = NULL and drop the lock: the timer remains
+ * locked.
+ */
+static
+struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
+                                            unsigned long *flags)
+{
+       struct hrtimer_clock_base *base;
+
+       for (;;) {
+               base = timer->base;
+               if (likely(base != NULL)) {
+                       raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
+                       if (likely(base == timer->base))
+                               return base;
+                       /* The timer has migrated to another CPU: */
+                       raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
+               }
+               cpu_relax();
+       }
+}
+
+/*
+ * With HIGHRES=y we do not migrate the timer when it is expiring
+ * before the next event on the target cpu because we cannot reprogram
+ * the target cpu hardware and we would cause it to fire late.
+ *
+ * Called with cpu_base->lock of target cpu held.
+ */
+static int
+hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+       ktime_t expires;
+
+       if (!new_base->cpu_base->hres_active)
+               return 0;
+
+       expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+       return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
+#else
+       return 0;
+#endif
+}
+
+/*
+ * Switch the timer base to the current CPU when possible.
+ */
+static inline struct hrtimer_clock_base *
+switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
+                   int pinned)
+{
+       struct hrtimer_clock_base *new_base;
+       struct hrtimer_cpu_base *new_cpu_base;
+       int this_cpu = smp_processor_id();
+       int cpu = get_nohz_timer_target(pinned);
+       int basenum = base->index;
+
+again:
+       new_cpu_base = &per_cpu(hrtimer_bases, cpu);
+       new_base = &new_cpu_base->clock_base[basenum];
+
+       if (base != new_base) {
+               /*
+                * We are trying to move timer to new_base.
+                * However we can't change timer's base while it is running,
+                * so we keep it on the same CPU. No hassle vs. reprogramming
+                * the event source in the high resolution case. The softirq
+                * code will take care of this when the timer function has
+                * completed. There is no conflict as we hold the lock until
+                * the timer is enqueued.
+                */
+               if (unlikely(hrtimer_callback_running(timer)))
+                       return base;
+
+               /* See the comment in lock_timer_base() */
+               timer->base = NULL;
+               raw_spin_unlock(&base->cpu_base->lock);
+               raw_spin_lock(&new_base->cpu_base->lock);
+
+               if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+                       cpu = this_cpu;
+                       raw_spin_unlock(&new_base->cpu_base->lock);
+                       raw_spin_lock(&base->cpu_base->lock);
+                       timer->base = base;
+                       goto again;
+               }
+               timer->base = new_base;
+       } else {
+               if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+                       cpu = this_cpu;
+                       goto again;
+               }
+       }
+       return new_base;
+}
+
+#else /* CONFIG_SMP */
+
+static inline struct hrtimer_clock_base *
+lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
+{
+       struct hrtimer_clock_base *base = timer->base;
+
+       raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
+
+       return base;
+}
+
+# define switch_hrtimer_base(t, b, p)  (b)
+
+#endif /* !CONFIG_SMP */
+
+/*
+ * Functions for the union type storage format of ktime_t which are
+ * too large for inlining:
+ */
+#if BITS_PER_LONG < 64
+# ifndef CONFIG_KTIME_SCALAR
+/**
+ * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
+ * @kt:                addend
+ * @nsec:      the scalar nsec value to add
+ *
+ * Returns the sum of kt and nsec in ktime_t format
+ */
+ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
+{
+       ktime_t tmp;
+
+       if (likely(nsec < NSEC_PER_SEC)) {
+               tmp.tv64 = nsec;
+       } else {
+               unsigned long rem = do_div(nsec, NSEC_PER_SEC);
+
+               /* Make sure nsec fits into long */
+               if (unlikely(nsec > KTIME_SEC_MAX))
+                       return (ktime_t){ .tv64 = KTIME_MAX };
+
+               tmp = ktime_set((long)nsec, rem);
+       }
+
+       return ktime_add(kt, tmp);
+}
+
+EXPORT_SYMBOL_GPL(ktime_add_ns);
+
+/**
+ * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
+ * @kt:                minuend
+ * @nsec:      the scalar nsec value to subtract
+ *
+ * Returns the subtraction of @nsec from @kt in ktime_t format
+ */
+ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
+{
+       ktime_t tmp;
+
+       if (likely(nsec < NSEC_PER_SEC)) {
+               tmp.tv64 = nsec;
+       } else {
+               unsigned long rem = do_div(nsec, NSEC_PER_SEC);
+
+               tmp = ktime_set((long)nsec, rem);
+       }
+
+       return ktime_sub(kt, tmp);
+}
+
+EXPORT_SYMBOL_GPL(ktime_sub_ns);
+# endif /* !CONFIG_KTIME_SCALAR */
+
+/*
+ * Divide a ktime value by a nanosecond value
+ */
+u64 ktime_divns(const ktime_t kt, s64 div)
+{
+       u64 dclc;
+       int sft = 0;
+
+       dclc = ktime_to_ns(kt);
+       /* Make sure the divisor is less than 2^32: */
+       while (div >> 32) {
+               sft++;
+               div >>= 1;
+       }
+       dclc >>= sft;
+       do_div(dclc, (unsigned long) div);
+
+       return dclc;
+}
+#endif /* BITS_PER_LONG >= 64 */
+
+/*
+ * Add two ktime values and do a safety check for overflow:
+ */
+ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
+{
+       ktime_t res = ktime_add(lhs, rhs);
+
+       /*
+        * We use KTIME_SEC_MAX here, the maximum timeout which we can
+        * return to user space in a timespec:
+        */
+       if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
+               res = ktime_set(KTIME_SEC_MAX, 0);
+
+       return res;
+}
+
+EXPORT_SYMBOL_GPL(ktime_add_safe);
+
+#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
+
+static struct debug_obj_descr hrtimer_debug_descr;
+
+static void *hrtimer_debug_hint(void *addr)
+{
+       return ((struct hrtimer *) addr)->function;
+}
+
+/*
+ * fixup_init is called when:
+ * - an active object is initialized
+ */
+static int hrtimer_fixup_init(void *addr, enum debug_obj_state state)
+{
+       struct hrtimer *timer = addr;
+
+       switch (state) {
+       case ODEBUG_STATE_ACTIVE:
+               hrtimer_cancel(timer);
+               debug_object_init(timer, &hrtimer_debug_descr);
+               return 1;
+       default:
+               return 0;
+       }
+}
+
+/*
+ * fixup_activate is called when:
+ * - an active object is activated
+ * - an unknown object is activated (might be a statically initialized object)
+ */
+static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
+{
+       switch (state) {
+
+       case ODEBUG_STATE_NOTAVAILABLE:
+               WARN_ON_ONCE(1);
+               return 0;
+
+       case ODEBUG_STATE_ACTIVE:
+               WARN_ON(1);
+
+       default:
+               return 0;
+       }
+}
+
+/*
+ * fixup_free is called when:
+ * - an active object is freed
+ */
+static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
+{
+       struct hrtimer *timer = addr;
+
+       switch (state) {
+       case ODEBUG_STATE_ACTIVE:
+               hrtimer_cancel(timer);
+               debug_object_free(timer, &hrtimer_debug_descr);
+               return 1;
+       default:
+               return 0;
+       }
+}
+
+static struct debug_obj_descr hrtimer_debug_descr = {
+       .name           = "hrtimer",
+       .debug_hint     = hrtimer_debug_hint,
+       .fixup_init     = hrtimer_fixup_init,
+       .fixup_activate = hrtimer_fixup_activate,
+       .fixup_free     = hrtimer_fixup_free,
+};
+
+static inline void debug_hrtimer_init(struct hrtimer *timer)
+{
+       debug_object_init(timer, &hrtimer_debug_descr);
+}
+
+static inline void debug_hrtimer_activate(struct hrtimer *timer)
+{
+       debug_object_activate(timer, &hrtimer_debug_descr);
+}
+
+static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
+{
+       debug_object_deactivate(timer, &hrtimer_debug_descr);
+}
+
+static inline void debug_hrtimer_free(struct hrtimer *timer)
+{
+       debug_object_free(timer, &hrtimer_debug_descr);
+}
+
+static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
+                          enum hrtimer_mode mode);
+
+void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
+                          enum hrtimer_mode mode)
+{
+       debug_object_init_on_stack(timer, &hrtimer_debug_descr);
+       __hrtimer_init(timer, clock_id, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
+
+void destroy_hrtimer_on_stack(struct hrtimer *timer)
+{
+       debug_object_free(timer, &hrtimer_debug_descr);
+}
+
+#else
+static inline void debug_hrtimer_init(struct hrtimer *timer) { }
+static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
+static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
+#endif
+
+static inline void
+debug_init(struct hrtimer *timer, clockid_t clockid,
+          enum hrtimer_mode mode)
+{
+       debug_hrtimer_init(timer);
+       trace_hrtimer_init(timer, clockid, mode);
+}
+
+static inline void debug_activate(struct hrtimer *timer)
+{
+       debug_hrtimer_activate(timer);
+       trace_hrtimer_start(timer);
+}
+
+static inline void debug_deactivate(struct hrtimer *timer)
+{
+       debug_hrtimer_deactivate(timer);
+       trace_hrtimer_cancel(timer);
+}
+
+/* High resolution timer related functions */
+#ifdef CONFIG_HIGH_RES_TIMERS
+
+/*
+ * High resolution timer enabled ?
+ */
+static int hrtimer_hres_enabled __read_mostly  = 1;
+
+/*
+ * Enable / Disable high resolution mode
+ */
+static int __init setup_hrtimer_hres(char *str)
+{
+       if (!strcmp(str, "off"))
+               hrtimer_hres_enabled = 0;
+       else if (!strcmp(str, "on"))
+               hrtimer_hres_enabled = 1;
+       else
+               return 0;
+       return 1;
+}
+
+__setup("highres=", setup_hrtimer_hres);
+
+/*
+ * hrtimer_high_res_enabled - query, if the highres mode is enabled
+ */
+static inline int hrtimer_is_hres_enabled(void)
+{
+       return hrtimer_hres_enabled;
+}
+
+/*
+ * Is the high resolution mode active ?
+ */
+static inline int hrtimer_hres_active(void)
+{
+       return __this_cpu_read(hrtimer_bases.hres_active);
+}
+
+/*
+ * Reprogram the event source with checking both queues for the
+ * next event
+ * Called with interrupts disabled and base->lock held
+ */
+static void
+hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
+{
+       int i;
+       struct hrtimer_clock_base *base = cpu_base->clock_base;
+       ktime_t expires, expires_next;
+
+       expires_next.tv64 = KTIME_MAX;
+
+       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
+               struct hrtimer *timer;
+               struct timerqueue_node *next;
+
+               next = timerqueue_getnext(&base->active);
+               if (!next)
+                       continue;
+               timer = container_of(next, struct hrtimer, node);
+
+               expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+               /*
+                * clock_was_set() has changed base->offset so the
+                * result might be negative. Fix it up to prevent a
+                * false positive in clockevents_program_event()
+                */
+               if (expires.tv64 < 0)
+                       expires.tv64 = 0;
+               if (expires.tv64 < expires_next.tv64)
+                       expires_next = expires;
+       }
+
+       if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
+               return;
+
+       cpu_base->expires_next.tv64 = expires_next.tv64;
+
+       /*
+        * If a hang was detected in the last timer interrupt then we
+        * leave the hang delay active in the hardware. We want the
+        * system to make progress. That also prevents the following
+        * scenario:
+        * T1 expires 50ms from now
+        * T2 expires 5s from now
+        *
+        * T1 is removed, so this code is called and would reprogram
+        * the hardware to 5s from now. Any hrtimer_start after that
+        * will not reprogram the hardware due to hang_detected being
+        * set. So we'd effectivly block all timers until the T2 event
+        * fires.
+        */
+       if (cpu_base->hang_detected)
+               return;
+
+       if (cpu_base->expires_next.tv64 != KTIME_MAX)
+               tick_program_event(cpu_base->expires_next, 1);
+}
+
+/*
+ * Shared reprogramming for clock_realtime and clock_monotonic
+ *
+ * When a timer is enqueued and expires earlier than the already enqueued
+ * timers, we have to check, whether it expires earlier than the timer for
+ * which the clock event device was armed.
+ *
+ * Called with interrupts disabled and base->cpu_base.lock held
+ */
+static int hrtimer_reprogram(struct hrtimer *timer,
+                            struct hrtimer_clock_base *base)
+{
+       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+       ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+       int res;
+
+       WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
+
+       /*
+        * When the callback is running, we do not reprogram the clock event
+        * device. The timer callback is either running on a different CPU or
+        * the callback is executed in the hrtimer_interrupt context. The
+        * reprogramming is handled either by the softirq, which called the
+        * callback or at the end of the hrtimer_interrupt.
+        */
+       if (hrtimer_callback_running(timer))
+               return 0;
+
+       /*
+        * CLOCK_REALTIME timer might be requested with an absolute
+        * expiry time which is less than base->offset. Nothing wrong
+        * about that, just avoid to call into the tick code, which
+        * has now objections against negative expiry values.
+        */
+       if (expires.tv64 < 0)
+               return -ETIME;
+
+       if (expires.tv64 >= cpu_base->expires_next.tv64)
+               return 0;
+
+       /*
+        * If a hang was detected in the last timer interrupt then we
+        * do not schedule a timer which is earlier than the expiry
+        * which we enforced in the hang detection. We want the system
+        * to make progress.
+        */
+       if (cpu_base->hang_detected)
+               return 0;
+
+       /*
+        * Clockevents returns -ETIME, when the event was in the past.
+        */
+       res = tick_program_event(expires, 0);
+       if (!IS_ERR_VALUE(res))
+               cpu_base->expires_next = expires;
+       return res;
+}
+
+/*
+ * Initialize the high resolution related parts of cpu_base
+ */
+static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
+{
+       base->expires_next.tv64 = KTIME_MAX;
+       base->hres_active = 0;
+}
+
+/*
+ * When High resolution timers are active, try to reprogram. Note, that in case
+ * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
+ * check happens. The timer gets enqueued into the rbtree. The reprogramming
+ * and expiry check is done in the hrtimer_interrupt or in the softirq.
+ */
+static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
+                                           struct hrtimer_clock_base *base)
+{
+       return base->cpu_base->hres_active && hrtimer_reprogram(timer, base);
+}
+
+static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
+{
+       ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
+       ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
+       ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
+
+       return ktime_get_update_offsets(offs_real, offs_boot, offs_tai);
+}
+
+/*
+ * Retrigger next event is called after clock was set
+ *
+ * Called with interrupts disabled via on_each_cpu()
+ */
+static void retrigger_next_event(void *arg)
+{
+       struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
+
+       if (!hrtimer_hres_active())
+               return;
+
+       raw_spin_lock(&base->lock);
+       hrtimer_update_base(base);
+       hrtimer_force_reprogram(base, 0);
+       raw_spin_unlock(&base->lock);
+}
+
+/*
+ * Switch to high resolution mode
+ */
+static int hrtimer_switch_to_hres(void)
+{
+       int i, cpu = smp_processor_id();
+       struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
+       unsigned long flags;
+
+       if (base->hres_active)
+               return 1;
+
+       local_irq_save(flags);
+
+       if (tick_init_highres()) {
+               local_irq_restore(flags);
+               printk(KERN_WARNING "Could not switch to high resolution "
+                                   "mode on CPU %d\n", cpu);
+               return 0;
+       }
+       base->hres_active = 1;
+       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
+               base->clock_base[i].resolution = KTIME_HIGH_RES;
+
+       tick_setup_sched_timer();
+       /* "Retrigger" the interrupt to get things going */
+       retrigger_next_event(NULL);
+       local_irq_restore(flags);
+       return 1;
+}
+
+static void clock_was_set_work(struct work_struct *work)
+{
+       clock_was_set();
+}
+
+static DECLARE_WORK(hrtimer_work, clock_was_set_work);
+
+/*
+ * Called from timekeeping and resume code to reprogramm the hrtimer
+ * interrupt device on all cpus.
+ */
+void clock_was_set_delayed(void)
+{
+       schedule_work(&hrtimer_work);
+}
+
+#else
+
+static inline int hrtimer_hres_active(void) { return 0; }
+static inline int hrtimer_is_hres_enabled(void) { return 0; }
+static inline int hrtimer_switch_to_hres(void) { return 0; }
+static inline void
+hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
+static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
+                                           struct hrtimer_clock_base *base)
+{
+       return 0;
+}
+static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
+static inline void retrigger_next_event(void *arg) { }
+
+#endif /* CONFIG_HIGH_RES_TIMERS */
+
+/*
+ * Clock realtime was set
+ *
+ * Change the offset of the realtime clock vs. the monotonic
+ * clock.
+ *
+ * We might have to reprogram the high resolution timer interrupt. On
+ * SMP we call the architecture specific code to retrigger _all_ high
+ * resolution timer interrupts. On UP we just disable interrupts and
+ * call the high resolution interrupt code.
+ */
+void clock_was_set(void)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+       /* Retrigger the CPU local events everywhere */
+       on_each_cpu(retrigger_next_event, NULL, 1);
+#endif
+       timerfd_clock_was_set();
+}
+
+/*
+ * During resume we might have to reprogram the high resolution timer
+ * interrupt on all online CPUs.  However, all other CPUs will be
+ * stopped with IRQs interrupts disabled so the clock_was_set() call
+ * must be deferred.
+ */
+void hrtimers_resume(void)
+{
+       WARN_ONCE(!irqs_disabled(),
+                 KERN_INFO "hrtimers_resume() called with IRQs enabled!");
+
+       /* Retrigger on the local CPU */
+       retrigger_next_event(NULL);
+       /* And schedule a retrigger for all others */
+       clock_was_set_delayed();
+}
+
+static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+       if (timer->start_site)
+               return;
+       timer->start_site = __builtin_return_address(0);
+       memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
+       timer->start_pid = current->pid;
+#endif
+}
+
+static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+       timer->start_site = NULL;
+#endif
+}
+
+static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+       if (likely(!timer_stats_active))
+               return;
+       timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
+                                timer->function, timer->start_comm, 0);
+#endif
+}
+
+/*
+ * Counterpart to lock_hrtimer_base above:
+ */
+static inline
+void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
+{
+       raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
+}
+
+/**
+ * hrtimer_forward - forward the timer expiry
+ * @timer:     hrtimer to forward
+ * @now:       forward past this time
+ * @interval:  the interval to forward
+ *
+ * Forward the timer expiry so it will expire in the future.
+ * Returns the number of overruns.
+ */
+u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
+{
+       u64 orun = 1;
+       ktime_t delta;
+
+       delta = ktime_sub(now, hrtimer_get_expires(timer));
+
+       if (delta.tv64 < 0)
+               return 0;
+
+       if (interval.tv64 < timer->base->resolution.tv64)
+               interval.tv64 = timer->base->resolution.tv64;
+
+       if (unlikely(delta.tv64 >= interval.tv64)) {
+               s64 incr = ktime_to_ns(interval);
+
+               orun = ktime_divns(delta, incr);
+               hrtimer_add_expires_ns(timer, incr * orun);
+               if (hrtimer_get_expires_tv64(timer) > now.tv64)
+                       return orun;
+               /*
+                * This (and the ktime_add() below) is the
+                * correction for exact:
+                */
+               orun++;
+       }
+       hrtimer_add_expires(timer, interval);
+
+       return orun;
+}
+EXPORT_SYMBOL_GPL(hrtimer_forward);
+
+/*
+ * enqueue_hrtimer - internal function to (re)start a timer
+ *
+ * The timer is inserted in expiry order. Insertion into the
+ * red black tree is O(log(n)). Must hold the base lock.
+ *
+ * Returns 1 when the new timer is the leftmost timer in the tree.
+ */
+static int enqueue_hrtimer(struct hrtimer *timer,
+                          struct hrtimer_clock_base *base)
+{
+       debug_activate(timer);
+
+       timerqueue_add(&base->active, &timer->node);
+       base->cpu_base->active_bases |= 1 << base->index;
+
+       /*
+        * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
+        * state of a possibly running callback.
+        */
+       timer->state |= HRTIMER_STATE_ENQUEUED;
+
+       return (&timer->node == base->active.next);
+}
+
+/*
+ * __remove_hrtimer - internal function to remove a timer
+ *
+ * Caller must hold the base lock.
+ *
+ * High resolution timer mode reprograms the clock event device when the
+ * timer is the one which expires next. The caller can disable this by setting
+ * reprogram to zero. This is useful, when the context does a reprogramming
+ * anyway (e.g. timer interrupt)
+ */
+static void __remove_hrtimer(struct hrtimer *timer,
+                            struct hrtimer_clock_base *base,
+                            unsigned long newstate, int reprogram)
+{
+       struct timerqueue_node *next_timer;
+       if (!(timer->state & HRTIMER_STATE_ENQUEUED))
+               goto out;
+
+       next_timer = timerqueue_getnext(&base->active);
+       timerqueue_del(&base->active, &timer->node);
+       if (&timer->node == next_timer) {
+#ifdef CONFIG_HIGH_RES_TIMERS
+               /* Reprogram the clock event device. if enabled */
+               if (reprogram && hrtimer_hres_active()) {
+                       ktime_t expires;
+
+                       expires = ktime_sub(hrtimer_get_expires(timer),
+                                           base->offset);
+                       if (base->cpu_base->expires_next.tv64 == expires.tv64)
+                               hrtimer_force_reprogram(base->cpu_base, 1);
+               }
+#endif
+       }
+       if (!timerqueue_getnext(&base->active))
+               base->cpu_base->active_bases &= ~(1 << base->index);
+out:
+       timer->state = newstate;
+}
+
+/*
+ * remove hrtimer, called with base lock held
+ */
+static inline int
+remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
+{
+       if (hrtimer_is_queued(timer)) {
+               unsigned long state;
+               int reprogram;
+
+               /*
+                * Remove the timer and force reprogramming when high
+                * resolution mode is active and the timer is on the current
+                * CPU. If we remove a timer on another CPU, reprogramming is
+                * skipped. The interrupt event on this CPU is fired and
+                * reprogramming happens in the interrupt handler. This is a
+                * rare case and less expensive than a smp call.
+                */
+               debug_deactivate(timer);
+               timer_stats_hrtimer_clear_start_info(timer);
+               reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
+               /*
+                * We must preserve the CALLBACK state flag here,
+                * otherwise we could move the timer base in
+                * switch_hrtimer_base.
+                */
+               state = timer->state & HRTIMER_STATE_CALLBACK;
+               __remove_hrtimer(timer, base, state, reprogram);
+               return 1;
+       }
+       return 0;
+}
+
+int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+               unsigned long delta_ns, const enum hrtimer_mode mode,
+               int wakeup)
+{
+       struct hrtimer_clock_base *base, *new_base;
+       unsigned long flags;
+       int ret, leftmost;
+
+       base = lock_hrtimer_base(timer, &flags);
+
+       /* Remove an active timer from the queue: */
+       ret = remove_hrtimer(timer, base);
+
+       if (mode & HRTIMER_MODE_REL) {
+               tim = ktime_add_safe(tim, base->get_time());
+               /*
+                * CONFIG_TIME_LOW_RES is a temporary way for architectures
+                * to signal that they simply return xtime in
+                * do_gettimeoffset(). In this case we want to round up by
+                * resolution when starting a relative timer, to avoid short
+                * timeouts. This will go away with the GTOD framework.
+                */
+#ifdef CONFIG_TIME_LOW_RES
+               tim = ktime_add_safe(tim, base->resolution);
+#endif
+       }
+
+       hrtimer_set_expires_range_ns(timer, tim, delta_ns);
+
+       /* Switch the timer base, if necessary: */
+       new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
+
+       timer_stats_hrtimer_set_start_info(timer);
+
+       leftmost = enqueue_hrtimer(timer, new_base);
+
+       /*
+        * Only allow reprogramming if the new base is on this CPU.
+        * (it might still be on another CPU if the timer was pending)
+        *
+        * XXX send_remote_softirq() ?
+        */
+       if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
+               && hrtimer_enqueue_reprogram(timer, new_base)) {
+               if (wakeup) {
+                       /*
+                        * We need to drop cpu_base->lock to avoid a
+                        * lock ordering issue vs. rq->lock.
+                        */
+                       raw_spin_unlock(&new_base->cpu_base->lock);
+                       raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+                       local_irq_restore(flags);
+                       return ret;
+               } else {
+                       __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+               }
+       }
+
+       unlock_hrtimer_base(timer, &flags);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
+
+/**
+ * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
+ * @timer:     the timer to be added
+ * @tim:       expiry time
+ * @delta_ns:  "slack" range for the timer
+ * @mode:      expiry mode: absolute (HRTIMER_MODE_ABS) or
+ *             relative (HRTIMER_MODE_REL)
+ *
+ * Returns:
+ *  0 on success
+ *  1 when the timer was active
+ */
+int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+               unsigned long delta_ns, const enum hrtimer_mode mode)
+{
+       return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1);
+}
+EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
+
+/**
+ * hrtimer_start - (re)start an hrtimer on the current CPU
+ * @timer:     the timer to be added
+ * @tim:       expiry time
+ * @mode:      expiry mode: absolute (HRTIMER_MODE_ABS) or
+ *             relative (HRTIMER_MODE_REL)
+ *
+ * Returns:
+ *  0 on success
+ *  1 when the timer was active
+ */
+int
+hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
+{
+       return __hrtimer_start_range_ns(timer, tim, 0, mode, 1);
+}
+EXPORT_SYMBOL_GPL(hrtimer_start);
+
+
+/**
+ * hrtimer_try_to_cancel - try to deactivate a timer
+ * @timer:     hrtimer to stop
+ *
+ * Returns:
+ *  0 when the timer was not active
+ *  1 when the timer was active
+ * -1 when the timer is currently excuting the callback function and
+ *    cannot be stopped
+ */
+int hrtimer_try_to_cancel(struct hrtimer *timer)
+{
+       struct hrtimer_clock_base *base;
+       unsigned long flags;
+       int ret = -1;
+
+       base = lock_hrtimer_base(timer, &flags);
+
+       if (!hrtimer_callback_running(timer))
+               ret = remove_hrtimer(timer, base);
+
+       unlock_hrtimer_base(timer, &flags);
+
+       return ret;
+
+}
+EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
+
+/**
+ * hrtimer_cancel - cancel a timer and wait for the handler to finish.
+ * @timer:     the timer to be cancelled
+ *
+ * Returns:
+ *  0 when the timer was not active
+ *  1 when the timer was active
+ */
+int hrtimer_cancel(struct hrtimer *timer)
+{
+       for (;;) {
+               int ret = hrtimer_try_to_cancel(timer);
+
+               if (ret >= 0)
+                       return ret;
+               cpu_relax();
+       }
+}
+EXPORT_SYMBOL_GPL(hrtimer_cancel);
+
+/**
+ * hrtimer_get_remaining - get remaining time for the timer
+ * @timer:     the timer to read
+ */
+ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+{
+       unsigned long flags;
+       ktime_t rem;
+
+       lock_hrtimer_base(timer, &flags);
+       rem = hrtimer_expires_remaining(timer);
+       unlock_hrtimer_base(timer, &flags);
+
+       return rem;
+}
+EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
+
+#ifdef CONFIG_NO_HZ_COMMON
+/**
+ * hrtimer_get_next_event - get the time until next expiry event
+ *
+ * Returns the delta to the next expiry event or KTIME_MAX if no timer
+ * is pending.
+ */
+ktime_t hrtimer_get_next_event(void)
+{
+       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+       struct hrtimer_clock_base *base = cpu_base->clock_base;
+       ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
+       unsigned long flags;
+       int i;
+
+       raw_spin_lock_irqsave(&cpu_base->lock, flags);
+
+       if (!hrtimer_hres_active()) {
+               for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
+                       struct hrtimer *timer;
+                       struct timerqueue_node *next;
+
+                       next = timerqueue_getnext(&base->active);
+                       if (!next)
+                               continue;
+
+                       timer = container_of(next, struct hrtimer, node);
+                       delta.tv64 = hrtimer_get_expires_tv64(timer);
+                       delta = ktime_sub(delta, base->get_time());
+                       if (delta.tv64 < mindelta.tv64)
+                               mindelta.tv64 = delta.tv64;
+               }
+       }
+
+       raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+
+       if (mindelta.tv64 < 0)
+               mindelta.tv64 = 0;
+       return mindelta;
+}
+#endif
+
+static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
+                          enum hrtimer_mode mode)
+{
+       struct hrtimer_cpu_base *cpu_base;
+       int base;
+
+       memset(timer, 0, sizeof(struct hrtimer));
+
+       cpu_base = &__raw_get_cpu_var(hrtimer_bases);
+
+       if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
+               clock_id = CLOCK_MONOTONIC;
+
+       base = hrtimer_clockid_to_base(clock_id);
+       timer->base = &cpu_base->clock_base[base];
+       timerqueue_init(&timer->node);
+
+#ifdef CONFIG_TIMER_STATS
+       timer->start_site = NULL;
+       timer->start_pid = -1;
+       memset(timer->start_comm, 0, TASK_COMM_LEN);
+#endif
+}
+
+/**
+ * hrtimer_init - initialize a timer to the given clock
+ * @timer:     the timer to be initialized
+ * @clock_id:  the clock to be used
+ * @mode:      timer mode abs/rel
+ */
+void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
+                 enum hrtimer_mode mode)
+{
+       debug_init(timer, clock_id, mode);
+       __hrtimer_init(timer, clock_id, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_init);
+
+/**
+ * hrtimer_get_res - get the timer resolution for a clock
+ * @which_clock: which clock to query
+ * @tp:                 pointer to timespec variable to store the resolution
+ *
+ * Store the resolution of the clock selected by @which_clock in the
+ * variable pointed to by @tp.
+ */
+int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
+{
+       struct hrtimer_cpu_base *cpu_base;
+       int base = hrtimer_clockid_to_base(which_clock);
+
+       cpu_base = &__raw_get_cpu_var(hrtimer_bases);
+       *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(hrtimer_get_res);
+
+static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
+{
+       struct hrtimer_clock_base *base = timer->base;
+       struct hrtimer_cpu_base *cpu_base = base->cpu_base;
+       enum hrtimer_restart (*fn)(struct hrtimer *);
+       int restart;
+
+       WARN_ON(!irqs_disabled());
+
+       debug_deactivate(timer);
+       __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
+       timer_stats_account_hrtimer(timer);
+       fn = timer->function;
+
+       /*
+        * Because we run timers from hardirq context, there is no chance
+        * they get migrated to another cpu, therefore its safe to unlock
+        * the timer base.
+        */
+       raw_spin_unlock(&cpu_base->lock);
+       trace_hrtimer_expire_entry(timer, now);
+       restart = fn(timer);
+       trace_hrtimer_expire_exit(timer);
+       raw_spin_lock(&cpu_base->lock);
+
+       /*
+        * Note: We clear the CALLBACK bit after enqueue_hrtimer and
+        * we do not reprogramm the event hardware. Happens either in
+        * hrtimer_start_range_ns() or in hrtimer_interrupt()
+        */
+       if (restart != HRTIMER_NORESTART) {
+               BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
+               enqueue_hrtimer(timer, base);
+       }
+
+       WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
+
+       timer->state &= ~HRTIMER_STATE_CALLBACK;
+}
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+
+/*
+ * High resolution timer interrupt
+ * Called with interrupts disabled
+ */
+void hrtimer_interrupt(struct clock_event_device *dev)
+{
+       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+       ktime_t expires_next, now, entry_time, delta;
+       int i, retries = 0;
+
+       BUG_ON(!cpu_base->hres_active);
+       cpu_base->nr_events++;
+       dev->next_event.tv64 = KTIME_MAX;
+
+       raw_spin_lock(&cpu_base->lock);
+       entry_time = now = hrtimer_update_base(cpu_base);
+retry:
+       expires_next.tv64 = KTIME_MAX;
+       /*
+        * We set expires_next to KTIME_MAX here with cpu_base->lock
+        * held to prevent that a timer is enqueued in our queue via
+        * the migration code. This does not affect enqueueing of
+        * timers which run their callback and need to be requeued on
+        * this CPU.
+        */
+       cpu_base->expires_next.tv64 = KTIME_MAX;
+
+       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+               struct hrtimer_clock_base *base;
+               struct timerqueue_node *node;
+               ktime_t basenow;
+
+               if (!(cpu_base->active_bases & (1 << i)))
+                       continue;
+
+               base = cpu_base->clock_base + i;
+               basenow = ktime_add(now, base->offset);
+
+               while ((node = timerqueue_getnext(&base->active))) {
+                       struct hrtimer *timer;
+
+                       timer = container_of(node, struct hrtimer, node);
+
+                       /*
+                        * The immediate goal for using the softexpires is
+                        * minimizing wakeups, not running timers at the
+                        * earliest interrupt after their soft expiration.
+                        * This allows us to avoid using a Priority Search
+                        * Tree, which can answer a stabbing querry for
+                        * overlapping intervals and instead use the simple
+                        * BST we already have.
+                        * We don't add extra wakeups by delaying timers that
+                        * are right-of a not yet expired timer, because that
+                        * timer will have to trigger a wakeup anyway.
+                        */
+
+                       if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
+                               ktime_t expires;
+
+                               expires = ktime_sub(hrtimer_get_expires(timer),
+                                                   base->offset);
+                               if (expires.tv64 < 0)
+                                       expires.tv64 = KTIME_MAX;
+                               if (expires.tv64 < expires_next.tv64)
+                                       expires_next = expires;
+                               break;
+                       }
+
+                       __run_hrtimer(timer, &basenow);
+               }
+       }
+
+       /*
+        * Store the new expiry value so the migration code can verify
+        * against it.
+        */
+       cpu_base->expires_next = expires_next;
+       raw_spin_unlock(&cpu_base->lock);
+
+       /* Reprogramming necessary ? */
+       if (expires_next.tv64 == KTIME_MAX ||
+           !tick_program_event(expires_next, 0)) {
+               cpu_base->hang_detected = 0;
+               return;
+       }
+
+       /*
+        * The next timer was already expired due to:
+        * - tracing
+        * - long lasting callbacks
+        * - being scheduled away when running in a VM
+        *
+        * We need to prevent that we loop forever in the hrtimer
+        * interrupt routine. We give it 3 attempts to avoid
+        * overreacting on some spurious event.
+        *
+        * Acquire base lock for updating the offsets and retrieving
+        * the current time.
+        */
+       raw_spin_lock(&cpu_base->lock);
+       now = hrtimer_update_base(cpu_base);
+       cpu_base->nr_retries++;
+       if (++retries < 3)
+               goto retry;
+       /*
+        * Give the system a chance to do something else than looping
+        * here. We stored the entry time, so we know exactly how long
+        * we spent here. We schedule the next event this amount of
+        * time away.
+        */
+       cpu_base->nr_hangs++;
+       cpu_base->hang_detected = 1;
+       raw_spin_unlock(&cpu_base->lock);
+       delta = ktime_sub(now, entry_time);
+       if (delta.tv64 > cpu_base->max_hang_time.tv64)
+               cpu_base->max_hang_time = delta;
+       /*
+        * Limit it to a sensible value as we enforce a longer
+        * delay. Give the CPU at least 100ms to catch up.
+        */
+       if (delta.tv64 > 100 * NSEC_PER_MSEC)
+               expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
+       else
+               expires_next = ktime_add(now, delta);
+       tick_program_event(expires_next, 1);
+       printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
+                   ktime_to_ns(delta));
+}
+
+/*
+ * local version of hrtimer_peek_ahead_timers() called with interrupts
+ * disabled.
+ */
+static void __hrtimer_peek_ahead_timers(void)
+{
+       struct tick_device *td;
+
+       if (!hrtimer_hres_active())
+               return;
+
+       td = &__get_cpu_var(tick_cpu_device);
+       if (td && td->evtdev)
+               hrtimer_interrupt(td->evtdev);
+}
+
+/**
+ * hrtimer_peek_ahead_timers -- run soft-expired timers now
+ *
+ * hrtimer_peek_ahead_timers will peek at the timer queue of
+ * the current cpu and check if there are any timers for which
+ * the soft expires time has passed. If any such timers exist,
+ * they are run immediately and then removed from the timer queue.
+ *
+ */
+void hrtimer_peek_ahead_timers(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       __hrtimer_peek_ahead_timers();
+       local_irq_restore(flags);
+}
+
+static void run_hrtimer_softirq(struct softirq_action *h)
+{
+       hrtimer_peek_ahead_timers();
+}
+
+#else /* CONFIG_HIGH_RES_TIMERS */
+
+static inline void __hrtimer_peek_ahead_timers(void) { }
+
+#endif /* !CONFIG_HIGH_RES_TIMERS */
+
+/*
+ * Called from timer softirq every jiffy, expire hrtimers:
+ *
+ * For HRT its the fall back code to run the softirq in the timer
+ * softirq context in case the hrtimer initialization failed or has
+ * not been done yet.
+ */
+void hrtimer_run_pending(void)
+{
+       if (hrtimer_hres_active())
+               return;
+
+       /*
+        * This _is_ ugly: We have to check in the softirq context,
+        * whether we can switch to highres and / or nohz mode. The
+        * clocksource switch happens in the timer interrupt with
+        * xtime_lock held. Notification from there only sets the
+        * check bit in the tick_oneshot code, otherwise we might
+        * deadlock vs. xtime_lock.
+        */
+       if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
+               hrtimer_switch_to_hres();
+}
+
+/*
+ * Called from hardirq context every jiffy
+ */
+void hrtimer_run_queues(void)
+{
+       struct timerqueue_node *node;
+       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+       struct hrtimer_clock_base *base;
+       int index, gettime = 1;
+
+       if (hrtimer_hres_active())
+               return;
+
+       for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
+               base = &cpu_base->clock_base[index];
+               if (!timerqueue_getnext(&base->active))
+                       continue;
+
+               if (gettime) {
+                       hrtimer_get_softirq_time(cpu_base);
+                       gettime = 0;
+               }
+
+               raw_spin_lock(&cpu_base->lock);
+
+               while ((node = timerqueue_getnext(&base->active))) {
+                       struct hrtimer *timer;
+
+                       timer = container_of(node, struct hrtimer, node);
+                       if (base->softirq_time.tv64 <=
+                                       hrtimer_get_expires_tv64(timer))
+                               break;
+
+                       __run_hrtimer(timer, &base->softirq_time);
+               }
+               raw_spin_unlock(&cpu_base->lock);
+       }
+}
+
+/*
+ * Sleep related functions:
+ */
+static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
+{
+       struct hrtimer_sleeper *t =
+               container_of(timer, struct hrtimer_sleeper, timer);
+       struct task_struct *task = t->task;
+
+       t->task = NULL;
+       if (task)
+               wake_up_process(task);
+
+       return HRTIMER_NORESTART;
+}
+
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+{
+       sl->timer.function = hrtimer_wakeup;
+       sl->task = task;
+}
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
+
+static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
+{
+       hrtimer_init_sleeper(t, current);
+
+       do {
+               set_current_state(TASK_INTERRUPTIBLE);
+               hrtimer_start_expires(&t->timer, mode);
+               if (!hrtimer_active(&t->timer))
+                       t->task = NULL;
+
+               if (likely(t->task))
+                       freezable_schedule();
+
+               hrtimer_cancel(&t->timer);
+               mode = HRTIMER_MODE_ABS;
+
+       } while (t->task && !signal_pending(current));
+
+       __set_current_state(TASK_RUNNING);
+
+       return t->task == NULL;
+}
+
+static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
+{
+       struct timespec rmt;
+       ktime_t rem;
+
+       rem = hrtimer_expires_remaining(timer);
+       if (rem.tv64 <= 0)
+               return 0;
+       rmt = ktime_to_timespec(rem);
+
+       if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
+               return -EFAULT;
+
+       return 1;
+}
+
+long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
+{
+       struct hrtimer_sleeper t;
+       struct timespec __user  *rmtp;
+       int ret = 0;
+
+       hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
+                               HRTIMER_MODE_ABS);
+       hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
+
+       if (do_nanosleep(&t, HRTIMER_MODE_ABS))
+               goto out;
+
+       rmtp = restart->nanosleep.rmtp;
+       if (rmtp) {
+               ret = update_rmtp(&t.timer, rmtp);
+               if (ret <= 0)
+                       goto out;
+       }
+
+       /* The other values in restart are already filled in */
+       ret = -ERESTART_RESTARTBLOCK;
+out:
+       destroy_hrtimer_on_stack(&t.timer);
+       return ret;
+}
+
+long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
+                      const enum hrtimer_mode mode, const clockid_t clockid)
+{
+       struct restart_block *restart;
+       struct hrtimer_sleeper t;
+       int ret = 0;
+       unsigned long slack;
+
+       slack = current->timer_slack_ns;
+       if (dl_task(current) || rt_task(current))
+               slack = 0;
+
+       hrtimer_init_on_stack(&t.timer, clockid, mode);
+       hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
+       if (do_nanosleep(&t, mode))
+               goto out;
+
+       /* Absolute timers do not update the rmtp value and restart: */
+       if (mode == HRTIMER_MODE_ABS) {
+               ret = -ERESTARTNOHAND;
+               goto out;
+       }
+
+       if (rmtp) {
+               ret = update_rmtp(&t.timer, rmtp);
+               if (ret <= 0)
+                       goto out;
+       }
+
+       restart = &current_thread_info()->restart_block;
+       restart->fn = hrtimer_nanosleep_restart;
+       restart->nanosleep.clockid = t.timer.base->clockid;
+       restart->nanosleep.rmtp = rmtp;
+       restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
+
+       ret = -ERESTART_RESTARTBLOCK;
+out:
+       destroy_hrtimer_on_stack(&t.timer);
+       return ret;
+}
+
+SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
+               struct timespec __user *, rmtp)
+{
+       struct timespec tu;
+
+       if (copy_from_user(&tu, rqtp, sizeof(tu)))
+               return -EFAULT;
+
+       if (!timespec_valid(&tu))
+               return -EINVAL;
+
+       return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+}
+
+/*
+ * Functions related to boot-time initialization:
+ */
+static void init_hrtimers_cpu(int cpu)
+{
+       struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
+       int i;
+
+       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+               cpu_base->clock_base[i].cpu_base = cpu_base;
+               timerqueue_init_head(&cpu_base->clock_base[i].active);
+       }
+
+       hrtimer_init_hres(cpu_base);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+                               struct hrtimer_clock_base *new_base)
+{
+       struct hrtimer *timer;
+       struct timerqueue_node *node;
+
+       while ((node = timerqueue_getnext(&old_base->active))) {
+               timer = container_of(node, struct hrtimer, node);
+               BUG_ON(hrtimer_callback_running(timer));
+               debug_deactivate(timer);
+
+               /*
+                * Mark it as STATE_MIGRATE not INACTIVE otherwise the
+                * timer could be seen as !active and just vanish away
+                * under us on another CPU
+                */
+               __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
+               timer->base = new_base;
+               /*
+                * Enqueue the timers on the new cpu. This does not
+                * reprogram the event device in case the timer
+                * expires before the earliest on this CPU, but we run
+                * hrtimer_interrupt after we migrated everything to
+                * sort out already expired timers and reprogram the
+                * event device.
+                */
+               enqueue_hrtimer(timer, new_base);
+
+               /* Clear the migration state bit */
+               timer->state &= ~HRTIMER_STATE_MIGRATE;
+       }
+}
+
+static void migrate_hrtimers(int scpu)
+{
+       struct hrtimer_cpu_base *old_base, *new_base;
+       int i;
+
+       BUG_ON(cpu_online(scpu));
+       tick_cancel_sched_timer(scpu);
+
+       local_irq_disable();
+       old_base = &per_cpu(hrtimer_bases, scpu);
+       new_base = &__get_cpu_var(hrtimer_bases);
+       /*
+        * The caller is globally serialized and nobody else
+        * takes two locks at once, deadlock is not possible.
+        */
+       raw_spin_lock(&new_base->lock);
+       raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+
+       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+               migrate_hrtimer_list(&old_base->clock_base[i],
+                                    &new_base->clock_base[i]);
+       }
+
+       raw_spin_unlock(&old_base->lock);
+       raw_spin_unlock(&new_base->lock);
+
+       /* Check, if we got expired work to do */
+       __hrtimer_peek_ahead_timers();
+       local_irq_enable();
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int hrtimer_cpu_notify(struct notifier_block *self,
+                                       unsigned long action, void *hcpu)
+{
+       int scpu = (long)hcpu;
+
+       switch (action) {
+
+       case CPU_UP_PREPARE:
+       case CPU_UP_PREPARE_FROZEN:
+               init_hrtimers_cpu(scpu);
+               break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+       case CPU_DYING:
+       case CPU_DYING_FROZEN:
+               clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
+               break;
+       case CPU_DEAD:
+       case CPU_DEAD_FROZEN:
+       {
+               clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
+               migrate_hrtimers(scpu);
+               break;
+       }
+#endif
+
+       default:
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block hrtimers_nb = {
+       .notifier_call = hrtimer_cpu_notify,
+};
+
+void __init hrtimers_init(void)
+{
+       hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
+                         (void *)(long)smp_processor_id());
+       register_cpu_notifier(&hrtimers_nb);
+#ifdef CONFIG_HIGH_RES_TIMERS
+       open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
+#endif
+}
+
+/**
+ * schedule_hrtimeout_range_clock - sleep until timeout
+ * @expires:   timeout value (ktime_t)
+ * @delta:     slack in expires timeout (ktime_t)
+ * @mode:      timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ * @clock:     timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
+ */
+int __sched
+schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
+                              const enum hrtimer_mode mode, int clock)
+{
+       struct hrtimer_sleeper t;
+
+       /*
+        * Optimize when a zero timeout value is given. It does not
+        * matter whether this is an absolute or a relative time.
+        */
+       if (expires && !expires->tv64) {
+               __set_current_state(TASK_RUNNING);
+               return 0;
+       }
+
+       /*
+        * A NULL parameter means "infinite"
+        */
+       if (!expires) {
+               schedule();
+               __set_current_state(TASK_RUNNING);
+               return -EINTR;
+       }
+
+       hrtimer_init_on_stack(&t.timer, clock, mode);
+       hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
+
+       hrtimer_init_sleeper(&t, current);
+
+       hrtimer_start_expires(&t.timer, mode);
+       if (!hrtimer_active(&t.timer))
+               t.task = NULL;
+
+       if (likely(t.task))
+               schedule();
+
+       hrtimer_cancel(&t.timer);
+       destroy_hrtimer_on_stack(&t.timer);
+
+       __set_current_state(TASK_RUNNING);
+
+       return !t.task ? 0 : -EINTR;
+}
+
+/**
+ * schedule_hrtimeout_range - sleep until timeout
+ * @expires:   timeout value (ktime_t)
+ * @delta:     slack in expires timeout (ktime_t)
+ * @mode:      timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * The @delta argument gives the kernel the freedom to schedule the
+ * actual wakeup to a time that is both power and performance friendly.
+ * The kernel give the normal best effort behavior for "@expires+@delta",
+ * but may decide to fire the timer earlier, but no earlier than @expires.
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns.
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns 0 when the timer has expired otherwise -EINTR
+ */
+int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+                                    const enum hrtimer_mode mode)
+{
+       return schedule_hrtimeout_range_clock(expires, delta, mode,
+                                             CLOCK_MONOTONIC);
+}
+EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
+
+/**
+ * schedule_hrtimeout - sleep until timeout
+ * @expires:   timeout value (ktime_t)
+ * @mode:      timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns.
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns 0 when the timer has expired otherwise -EINTR
+ */
+int __sched schedule_hrtimeout(ktime_t *expires,
+                              const enum hrtimer_mode mode)
+{
+       return schedule_hrtimeout_range(expires, 0, mode);
+}
+EXPORT_SYMBOL_GPL(schedule_hrtimeout);
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
new file mode 100644 (file)
index 0000000..8d262b4
--- /dev/null
@@ -0,0 +1,301 @@
+/*
+ * linux/kernel/itimer.c
+ *
+ * Copyright (C) 1992 Darren Senn
+ */
+
+/* These are all the functions necessary to implement itimers */
+
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/syscalls.h>
+#include <linux/time.h>
+#include <linux/posix-timers.h>
+#include <linux/hrtimer.h>
+#include <trace/events/timer.h>
+
+#include <asm/uaccess.h>
+
+/**
+ * itimer_get_remtime - get remaining time for the timer
+ *
+ * @timer: the timer to read
+ *
+ * Returns the delta between the expiry time and now, which can be
+ * less than zero or 1usec for an pending expired timer
+ */
+static struct timeval itimer_get_remtime(struct hrtimer *timer)
+{
+       ktime_t rem = hrtimer_get_remaining(timer);
+
+       /*
+        * Racy but safe: if the itimer expires after the above
+        * hrtimer_get_remtime() call but before this condition
+        * then we return 0 - which is correct.
+        */
+       if (hrtimer_active(timer)) {
+               if (rem.tv64 <= 0)
+                       rem.tv64 = NSEC_PER_USEC;
+       } else
+               rem.tv64 = 0;
+
+       return ktime_to_timeval(rem);
+}
+
+static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+                          struct itimerval *const value)
+{
+       cputime_t cval, cinterval;
+       struct cpu_itimer *it = &tsk->signal->it[clock_id];
+
+       spin_lock_irq(&tsk->sighand->siglock);
+
+       cval = it->expires;
+       cinterval = it->incr;
+       if (cval) {
+               struct task_cputime cputime;
+               cputime_t t;
+
+               thread_group_cputimer(tsk, &cputime);
+               if (clock_id == CPUCLOCK_PROF)
+                       t = cputime.utime + cputime.stime;
+               else
+                       /* CPUCLOCK_VIRT */
+                       t = cputime.utime;
+
+               if (cval < t)
+                       /* about to fire */
+                       cval = cputime_one_jiffy;
+               else
+                       cval = cval - t;
+       }
+
+       spin_unlock_irq(&tsk->sighand->siglock);
+
+       cputime_to_timeval(cval, &value->it_value);
+       cputime_to_timeval(cinterval, &value->it_interval);
+}
+
+int do_getitimer(int which, struct itimerval *value)
+{
+       struct task_struct *tsk = current;
+
+       switch (which) {
+       case ITIMER_REAL:
+               spin_lock_irq(&tsk->sighand->siglock);
+               value->it_value = itimer_get_remtime(&tsk->signal->real_timer);
+               value->it_interval =
+                       ktime_to_timeval(tsk->signal->it_real_incr);
+               spin_unlock_irq(&tsk->sighand->siglock);
+               break;
+       case ITIMER_VIRTUAL:
+               get_cpu_itimer(tsk, CPUCLOCK_VIRT, value);
+               break;
+       case ITIMER_PROF:
+               get_cpu_itimer(tsk, CPUCLOCK_PROF, value);
+               break;
+       default:
+               return(-EINVAL);
+       }
+       return 0;
+}
+
+SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value)
+{
+       int error = -EFAULT;
+       struct itimerval get_buffer;
+
+       if (value) {
+               error = do_getitimer(which, &get_buffer);
+               if (!error &&
+                   copy_to_user(value, &get_buffer, sizeof(get_buffer)))
+                       error = -EFAULT;
+       }
+       return error;
+}
+
+
+/*
+ * The timer is automagically restarted, when interval != 0
+ */
+enum hrtimer_restart it_real_fn(struct hrtimer *timer)
+{
+       struct signal_struct *sig =
+               container_of(timer, struct signal_struct, real_timer);
+
+       trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
+       kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
+
+       return HRTIMER_NORESTART;
+}
+
+static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns)
+{
+       struct timespec ts;
+       s64 cpu_ns;
+
+       cputime_to_timespec(ct, &ts);
+       cpu_ns = timespec_to_ns(&ts);
+
+       return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns;
+}
+
+static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+                          const struct itimerval *const value,
+                          struct itimerval *const ovalue)
+{
+       cputime_t cval, nval, cinterval, ninterval;
+       s64 ns_ninterval, ns_nval;
+       u32 error, incr_error;
+       struct cpu_itimer *it = &tsk->signal->it[clock_id];
+
+       nval = timeval_to_cputime(&value->it_value);
+       ns_nval = timeval_to_ns(&value->it_value);
+       ninterval = timeval_to_cputime(&value->it_interval);
+       ns_ninterval = timeval_to_ns(&value->it_interval);
+
+       error = cputime_sub_ns(nval, ns_nval);
+       incr_error = cputime_sub_ns(ninterval, ns_ninterval);
+
+       spin_lock_irq(&tsk->sighand->siglock);
+
+       cval = it->expires;
+       cinterval = it->incr;
+       if (cval || nval) {
+               if (nval > 0)
+                       nval += cputime_one_jiffy;
+               set_process_cpu_timer(tsk, clock_id, &nval, &cval);
+       }
+       it->expires = nval;
+       it->incr = ninterval;
+       it->error = error;
+       it->incr_error = incr_error;
+       trace_itimer_state(clock_id == CPUCLOCK_VIRT ?
+                          ITIMER_VIRTUAL : ITIMER_PROF, value, nval);
+
+       spin_unlock_irq(&tsk->sighand->siglock);
+
+       if (ovalue) {
+               cputime_to_timeval(cval, &ovalue->it_value);
+               cputime_to_timeval(cinterval, &ovalue->it_interval);
+       }
+}
+
+/*
+ * Returns true if the timeval is in canonical form
+ */
+#define timeval_valid(t) \
+       (((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC))
+
+int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
+{
+       struct task_struct *tsk = current;
+       struct hrtimer *timer;
+       ktime_t expires;
+
+       /*
+        * Validate the timevals in value.
+        */
+       if (!timeval_valid(&value->it_value) ||
+           !timeval_valid(&value->it_interval))
+               return -EINVAL;
+
+       switch (which) {
+       case ITIMER_REAL:
+again:
+               spin_lock_irq(&tsk->sighand->siglock);
+               timer = &tsk->signal->real_timer;
+               if (ovalue) {
+                       ovalue->it_value = itimer_get_remtime(timer);
+                       ovalue->it_interval
+                               = ktime_to_timeval(tsk->signal->it_real_incr);
+               }
+               /* We are sharing ->siglock with it_real_fn() */
+               if (hrtimer_try_to_cancel(timer) < 0) {
+                       spin_unlock_irq(&tsk->sighand->siglock);
+                       goto again;
+               }
+               expires = timeval_to_ktime(value->it_value);
+               if (expires.tv64 != 0) {
+                       tsk->signal->it_real_incr =
+                               timeval_to_ktime(value->it_interval);
+                       hrtimer_start(timer, expires, HRTIMER_MODE_REL);
+               } else
+                       tsk->signal->it_real_incr.tv64 = 0;
+
+               trace_itimer_state(ITIMER_REAL, value, 0);
+               spin_unlock_irq(&tsk->sighand->siglock);
+               break;
+       case ITIMER_VIRTUAL:
+               set_cpu_itimer(tsk, CPUCLOCK_VIRT, value, ovalue);
+               break;
+       case ITIMER_PROF:
+               set_cpu_itimer(tsk, CPUCLOCK_PROF, value, ovalue);
+               break;
+       default:
+               return -EINVAL;
+       }
+       return 0;
+}
+
+/**
+ * alarm_setitimer - set alarm in seconds
+ *
+ * @seconds:   number of seconds until alarm
+ *             0 disables the alarm
+ *
+ * Returns the remaining time in seconds of a pending timer or 0 when
+ * the timer is not active.
+ *
+ * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid
+ * negative timeval settings which would cause immediate expiry.
+ */
+unsigned int alarm_setitimer(unsigned int seconds)
+{
+       struct itimerval it_new, it_old;
+
+#if BITS_PER_LONG < 64
+       if (seconds > INT_MAX)
+               seconds = INT_MAX;
+#endif
+       it_new.it_value.tv_sec = seconds;
+       it_new.it_value.tv_usec = 0;
+       it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+
+       do_setitimer(ITIMER_REAL, &it_new, &it_old);
+
+       /*
+        * We can't return 0 if we have an alarm pending ...  And we'd
+        * better return too much than too little anyway
+        */
+       if ((!it_old.it_value.tv_sec && it_old.it_value.tv_usec) ||
+             it_old.it_value.tv_usec >= 500000)
+               it_old.it_value.tv_sec++;
+
+       return it_old.it_value.tv_sec;
+}
+
+SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
+               struct itimerval __user *, ovalue)
+{
+       struct itimerval set_buffer, get_buffer;
+       int error;
+
+       if (value) {
+               if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
+                       return -EFAULT;
+       } else {
+               memset(&set_buffer, 0, sizeof(set_buffer));
+               printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer."
+                           " Misfeature support will be removed\n",
+                           current->comm);
+       }
+
+       error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
+       if (error || !ovalue)
+               return error;
+
+       if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
+               return -EFAULT;
+       return 0;
+}
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
new file mode 100644 (file)
index 0000000..3b89464
--- /dev/null
@@ -0,0 +1,1490 @@
+/*
+ * Implement CPU time clocks for the POSIX clock interface.
+ */
+
+#include <linux/sched.h>
+#include <linux/posix-timers.h>
+#include <linux/errno.h>
+#include <linux/math64.h>
+#include <asm/uaccess.h>
+#include <linux/kernel_stat.h>
+#include <trace/events/timer.h>
+#include <linux/random.h>
+#include <linux/tick.h>
+#include <linux/workqueue.h>
+
+/*
+ * Called after updating RLIMIT_CPU to run cpu timer and update
+ * tsk->signal->cputime_expires expiration cache if necessary. Needs
+ * siglock protection since other code may update expiration cache as
+ * well.
+ */
+void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
+{
+       cputime_t cputime = secs_to_cputime(rlim_new);
+
+       spin_lock_irq(&task->sighand->siglock);
+       set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
+       spin_unlock_irq(&task->sighand->siglock);
+}
+
+static int check_clock(const clockid_t which_clock)
+{
+       int error = 0;
+       struct task_struct *p;
+       const pid_t pid = CPUCLOCK_PID(which_clock);
+
+       if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
+               return -EINVAL;
+
+       if (pid == 0)
+               return 0;
+
+       rcu_read_lock();
+       p = find_task_by_vpid(pid);
+       if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
+                  same_thread_group(p, current) : has_group_leader_pid(p))) {
+               error = -EINVAL;
+       }
+       rcu_read_unlock();
+
+       return error;
+}
+
+static inline unsigned long long
+timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
+{
+       unsigned long long ret;
+
+       ret = 0;                /* high half always zero when .cpu used */
+       if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
+               ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
+       } else {
+               ret = cputime_to_expires(timespec_to_cputime(tp));
+       }
+       return ret;
+}
+
+static void sample_to_timespec(const clockid_t which_clock,
+                              unsigned long long expires,
+                              struct timespec *tp)
+{
+       if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
+               *tp = ns_to_timespec(expires);
+       else
+               cputime_to_timespec((__force cputime_t)expires, tp);
+}
+
+/*
+ * Update expiry time from increment, and increase overrun count,
+ * given the current clock sample.
+ */
+static void bump_cpu_timer(struct k_itimer *timer,
+                          unsigned long long now)
+{
+       int i;
+       unsigned long long delta, incr;
+
+       if (timer->it.cpu.incr == 0)
+               return;
+
+       if (now < timer->it.cpu.expires)
+               return;
+
+       incr = timer->it.cpu.incr;
+       delta = now + incr - timer->it.cpu.expires;
+
+       /* Don't use (incr*2 < delta), incr*2 might overflow. */
+       for (i = 0; incr < delta - incr; i++)
+               incr = incr << 1;
+
+       for (; i >= 0; incr >>= 1, i--) {
+               if (delta < incr)
+                       continue;
+
+               timer->it.cpu.expires += incr;
+               timer->it_overrun += 1 << i;
+               delta -= incr;
+       }
+}
+
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime:   The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero.  Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+       if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
+               return 1;
+       return 0;
+}
+
+static inline unsigned long long prof_ticks(struct task_struct *p)
+{
+       cputime_t utime, stime;
+
+       task_cputime(p, &utime, &stime);
+
+       return cputime_to_expires(utime + stime);
+}
+static inline unsigned long long virt_ticks(struct task_struct *p)
+{
+       cputime_t utime;
+
+       task_cputime(p, &utime, NULL);
+
+       return cputime_to_expires(utime);
+}
+
+static int
+posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
+{
+       int error = check_clock(which_clock);
+       if (!error) {
+               tp->tv_sec = 0;
+               tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
+               if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
+                       /*
+                        * If sched_clock is using a cycle counter, we
+                        * don't have any idea of its true resolution
+                        * exported, but it is much more than 1s/HZ.
+                        */
+                       tp->tv_nsec = 1;
+               }
+       }
+       return error;
+}
+
+static int
+posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
+{
+       /*
+        * You can never reset a CPU clock, but we check for other errors
+        * in the call before failing with EPERM.
+        */
+       int error = check_clock(which_clock);
+       if (error == 0) {
+               error = -EPERM;
+       }
+       return error;
+}
+
+
+/*
+ * Sample a per-thread clock for the given task.
+ */
+static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
+                           unsigned long long *sample)
+{
+       switch (CPUCLOCK_WHICH(which_clock)) {
+       default:
+               return -EINVAL;
+       case CPUCLOCK_PROF:
+               *sample = prof_ticks(p);
+               break;
+       case CPUCLOCK_VIRT:
+               *sample = virt_ticks(p);
+               break;
+       case CPUCLOCK_SCHED:
+               *sample = task_sched_runtime(p);
+               break;
+       }
+       return 0;
+}
+
+static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
+{
+       if (b->utime > a->utime)
+               a->utime = b->utime;
+
+       if (b->stime > a->stime)
+               a->stime = b->stime;
+
+       if (b->sum_exec_runtime > a->sum_exec_runtime)
+               a->sum_exec_runtime = b->sum_exec_runtime;
+}
+
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
+{
+       struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+       struct task_cputime sum;
+       unsigned long flags;
+
+       if (!cputimer->running) {
+               /*
+                * The POSIX timer interface allows for absolute time expiry
+                * values through the TIMER_ABSTIME flag, therefore we have
+                * to synchronize the timer to the clock every time we start
+                * it.
+                */
+               thread_group_cputime(tsk, &sum);
+               raw_spin_lock_irqsave(&cputimer->lock, flags);
+               cputimer->running = 1;
+               update_gt_cputime(&cputimer->cputime, &sum);
+       } else
+               raw_spin_lock_irqsave(&cputimer->lock, flags);
+       *times = cputimer->cputime;
+       raw_spin_unlock_irqrestore(&cputimer->lock, flags);
+}
+
+/*
+ * Sample a process (thread group) clock for the given group_leader task.
+ * Must be called with task sighand lock held for safe while_each_thread()
+ * traversal.
+ */
+static int cpu_clock_sample_group(const clockid_t which_clock,
+                                 struct task_struct *p,
+                                 unsigned long long *sample)
+{
+       struct task_cputime cputime;
+
+       switch (CPUCLOCK_WHICH(which_clock)) {
+       default:
+               return -EINVAL;
+       case CPUCLOCK_PROF:
+               thread_group_cputime(p, &cputime);
+               *sample = cputime_to_expires(cputime.utime + cputime.stime);
+               break;
+       case CPUCLOCK_VIRT:
+               thread_group_cputime(p, &cputime);
+               *sample = cputime_to_expires(cputime.utime);
+               break;
+       case CPUCLOCK_SCHED:
+               thread_group_cputime(p, &cputime);
+               *sample = cputime.sum_exec_runtime;
+               break;
+       }
+       return 0;
+}
+
+static int posix_cpu_clock_get_task(struct task_struct *tsk,
+                                   const clockid_t which_clock,
+                                   struct timespec *tp)
+{
+       int err = -EINVAL;
+       unsigned long long rtn;
+
+       if (CPUCLOCK_PERTHREAD(which_clock)) {
+               if (same_thread_group(tsk, current))
+                       err = cpu_clock_sample(which_clock, tsk, &rtn);
+       } else {
+               unsigned long flags;
+               struct sighand_struct *sighand;
+
+               /*
+                * while_each_thread() is not yet entirely RCU safe,
+                * keep locking the group while sampling process
+                * clock for now.
+                */
+               sighand = lock_task_sighand(tsk, &flags);
+               if (!sighand)
+                       return err;
+
+               if (tsk == current || thread_group_leader(tsk))
+                       err = cpu_clock_sample_group(which_clock, tsk, &rtn);
+
+               unlock_task_sighand(tsk, &flags);
+       }
+
+       if (!err)
+               sample_to_timespec(which_clock, rtn, tp);
+
+       return err;
+}
+
+
+static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
+{
+       const pid_t pid = CPUCLOCK_PID(which_clock);
+       int err = -EINVAL;
+
+       if (pid == 0) {
+               /*
+                * Special case constant value for our own clocks.
+                * We don't have to do any lookup to find ourselves.
+                */
+               err = posix_cpu_clock_get_task(current, which_clock, tp);
+       } else {
+               /*
+                * Find the given PID, and validate that the caller
+                * should be able to see it.
+                */
+               struct task_struct *p;
+               rcu_read_lock();
+               p = find_task_by_vpid(pid);
+               if (p)
+                       err = posix_cpu_clock_get_task(p, which_clock, tp);
+               rcu_read_unlock();
+       }
+
+       return err;
+}
+
+
+/*
+ * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
+ * This is called from sys_timer_create() and do_cpu_nanosleep() with the
+ * new timer already all-zeros initialized.
+ */
+static int posix_cpu_timer_create(struct k_itimer *new_timer)
+{
+       int ret = 0;
+       const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
+       struct task_struct *p;
+
+       if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
+               return -EINVAL;
+
+       INIT_LIST_HEAD(&new_timer->it.cpu.entry);
+
+       rcu_read_lock();
+       if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
+               if (pid == 0) {
+                       p = current;
+               } else {
+                       p = find_task_by_vpid(pid);
+                       if (p && !same_thread_group(p, current))
+                               p = NULL;
+               }
+       } else {
+               if (pid == 0) {
+                       p = current->group_leader;
+               } else {
+                       p = find_task_by_vpid(pid);
+                       if (p && !has_group_leader_pid(p))
+                               p = NULL;
+               }
+       }
+       new_timer->it.cpu.task = p;
+       if (p) {
+               get_task_struct(p);
+       } else {
+               ret = -EINVAL;
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
+/*
+ * Clean up a CPU-clock timer that is about to be destroyed.
+ * This is called from timer deletion with the timer already locked.
+ * If we return TIMER_RETRY, it's necessary to release the timer's lock
+ * and try again.  (This happens when the timer is in the middle of firing.)
+ */
+static int posix_cpu_timer_del(struct k_itimer *timer)
+{
+       int ret = 0;
+       unsigned long flags;
+       struct sighand_struct *sighand;
+       struct task_struct *p = timer->it.cpu.task;
+
+       WARN_ON_ONCE(p == NULL);
+
+       /*
+        * Protect against sighand release/switch in exit/exec and process/
+        * thread timer list entry concurrent read/writes.
+        */
+       sighand = lock_task_sighand(p, &flags);
+       if (unlikely(sighand == NULL)) {
+               /*
+                * We raced with the reaping of the task.
+                * The deletion should have cleared us off the list.
+                */
+               WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
+       } else {
+               if (timer->it.cpu.firing)
+                       ret = TIMER_RETRY;
+               else
+                       list_del(&timer->it.cpu.entry);
+
+               unlock_task_sighand(p, &flags);
+       }
+
+       if (!ret)
+               put_task_struct(p);
+
+       return ret;
+}
+
+static void cleanup_timers_list(struct list_head *head)
+{
+       struct cpu_timer_list *timer, *next;
+
+       list_for_each_entry_safe(timer, next, head, entry)
+               list_del_init(&timer->entry);
+}
+
+/*
+ * Clean out CPU timers still ticking when a thread exited.  The task
+ * pointer is cleared, and the expiry time is replaced with the residual
+ * time for later timer_gettime calls to return.
+ * This must be called with the siglock held.
+ */
+static void cleanup_timers(struct list_head *head)
+{
+       cleanup_timers_list(head);
+       cleanup_timers_list(++head);
+       cleanup_timers_list(++head);
+}
+
+/*
+ * These are both called with the siglock held, when the current thread
+ * is being reaped.  When the final (leader) thread in the group is reaped,
+ * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
+ */
+void posix_cpu_timers_exit(struct task_struct *tsk)
+{
+       add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
+                                               sizeof(unsigned long long));
+       cleanup_timers(tsk->cpu_timers);
+
+}
+void posix_cpu_timers_exit_group(struct task_struct *tsk)
+{
+       cleanup_timers(tsk->signal->cpu_timers);
+}
+
+static inline int expires_gt(cputime_t expires, cputime_t new_exp)
+{
+       return expires == 0 || expires > new_exp;
+}
+
+/*
+ * Insert the timer on the appropriate list before any timers that
+ * expire later.  This must be called with the sighand lock held.
+ */
+static void arm_timer(struct k_itimer *timer)
+{
+       struct task_struct *p = timer->it.cpu.task;
+       struct list_head *head, *listpos;
+       struct task_cputime *cputime_expires;
+       struct cpu_timer_list *const nt = &timer->it.cpu;
+       struct cpu_timer_list *next;
+
+       if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+               head = p->cpu_timers;
+               cputime_expires = &p->cputime_expires;
+       } else {
+               head = p->signal->cpu_timers;
+               cputime_expires = &p->signal->cputime_expires;
+       }
+       head += CPUCLOCK_WHICH(timer->it_clock);
+
+       listpos = head;
+       list_for_each_entry(next, head, entry) {
+               if (nt->expires < next->expires)
+                       break;
+               listpos = &next->entry;
+       }
+       list_add(&nt->entry, listpos);
+
+       if (listpos == head) {
+               unsigned long long exp = nt->expires;
+
+               /*
+                * We are the new earliest-expiring POSIX 1.b timer, hence
+                * need to update expiration cache. Take into account that
+                * for process timers we share expiration cache with itimers
+                * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
+                */
+
+               switch (CPUCLOCK_WHICH(timer->it_clock)) {
+               case CPUCLOCK_PROF:
+                       if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp)))
+                               cputime_expires->prof_exp = expires_to_cputime(exp);
+                       break;
+               case CPUCLOCK_VIRT:
+                       if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp)))
+                               cputime_expires->virt_exp = expires_to_cputime(exp);
+                       break;
+               case CPUCLOCK_SCHED:
+                       if (cputime_expires->sched_exp == 0 ||
+                           cputime_expires->sched_exp > exp)
+                               cputime_expires->sched_exp = exp;
+                       break;
+               }
+       }
+}
+
+/*
+ * The timer is locked, fire it and arrange for its reload.
+ */
+static void cpu_timer_fire(struct k_itimer *timer)
+{
+       if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
+               /*
+                * User don't want any signal.
+                */
+               timer->it.cpu.expires = 0;
+       } else if (unlikely(timer->sigq == NULL)) {
+               /*
+                * This a special case for clock_nanosleep,
+                * not a normal timer from sys_timer_create.
+                */
+               wake_up_process(timer->it_process);
+               timer->it.cpu.expires = 0;
+       } else if (timer->it.cpu.incr == 0) {
+               /*
+                * One-shot timer.  Clear it as soon as it's fired.
+                */
+               posix_timer_event(timer, 0);
+               timer->it.cpu.expires = 0;
+       } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
+               /*
+                * The signal did not get queued because the signal
+                * was ignored, so we won't get any callback to
+                * reload the timer.  But we need to keep it
+                * ticking in case the signal is deliverable next time.
+                */
+               posix_cpu_timer_schedule(timer);
+       }
+}
+
+/*
+ * Sample a process (thread group) timer for the given group_leader task.
+ * Must be called with task sighand lock held for safe while_each_thread()
+ * traversal.
+ */
+static int cpu_timer_sample_group(const clockid_t which_clock,
+                                 struct task_struct *p,
+                                 unsigned long long *sample)
+{
+       struct task_cputime cputime;
+
+       thread_group_cputimer(p, &cputime);
+       switch (CPUCLOCK_WHICH(which_clock)) {
+       default:
+               return -EINVAL;
+       case CPUCLOCK_PROF:
+               *sample = cputime_to_expires(cputime.utime + cputime.stime);
+               break;
+       case CPUCLOCK_VIRT:
+               *sample = cputime_to_expires(cputime.utime);
+               break;
+       case CPUCLOCK_SCHED:
+               *sample = cputime.sum_exec_runtime + task_delta_exec(p);
+               break;
+       }
+       return 0;
+}
+
+#ifdef CONFIG_NO_HZ_FULL
+static void nohz_kick_work_fn(struct work_struct *work)
+{
+       tick_nohz_full_kick_all();
+}
+
+static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
+
+/*
+ * We need the IPIs to be sent from sane process context.
+ * The posix cpu timers are always set with irqs disabled.
+ */
+static void posix_cpu_timer_kick_nohz(void)
+{
+       if (context_tracking_is_enabled())
+               schedule_work(&nohz_kick_work);
+}
+
+bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
+{
+       if (!task_cputime_zero(&tsk->cputime_expires))
+               return false;
+
+       if (tsk->signal->cputimer.running)
+               return false;
+
+       return true;
+}
+#else
+static inline void posix_cpu_timer_kick_nohz(void) { }
+#endif
+
+/*
+ * Guts of sys_timer_settime for CPU timers.
+ * This is called with the timer locked and interrupts disabled.
+ * If we return TIMER_RETRY, it's necessary to release the timer's lock
+ * and try again.  (This happens when the timer is in the middle of firing.)
+ */
+static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
+                              struct itimerspec *new, struct itimerspec *old)
+{
+       unsigned long flags;
+       struct sighand_struct *sighand;
+       struct task_struct *p = timer->it.cpu.task;
+       unsigned long long old_expires, new_expires, old_incr, val;
+       int ret;
+
+       WARN_ON_ONCE(p == NULL);
+
+       new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
+
+       /*
+        * Protect against sighand release/switch in exit/exec and p->cpu_timers
+        * and p->signal->cpu_timers read/write in arm_timer()
+        */
+       sighand = lock_task_sighand(p, &flags);
+       /*
+        * If p has just been reaped, we can no
+        * longer get any information about it at all.
+        */
+       if (unlikely(sighand == NULL)) {
+               return -ESRCH;
+       }
+
+       /*
+        * Disarm any old timer after extracting its expiry time.
+        */
+       WARN_ON_ONCE(!irqs_disabled());
+
+       ret = 0;
+       old_incr = timer->it.cpu.incr;
+       old_expires = timer->it.cpu.expires;
+       if (unlikely(timer->it.cpu.firing)) {
+               timer->it.cpu.firing = -1;
+               ret = TIMER_RETRY;
+       } else
+               list_del_init(&timer->it.cpu.entry);
+
+       /*
+        * We need to sample the current value to convert the new
+        * value from to relative and absolute, and to convert the
+        * old value from absolute to relative.  To set a process
+        * timer, we need a sample to balance the thread expiry
+        * times (in arm_timer).  With an absolute time, we must
+        * check if it's already passed.  In short, we need a sample.
+        */
+       if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+               cpu_clock_sample(timer->it_clock, p, &val);
+       } else {
+               cpu_timer_sample_group(timer->it_clock, p, &val);
+       }
+
+       if (old) {
+               if (old_expires == 0) {
+                       old->it_value.tv_sec = 0;
+                       old->it_value.tv_nsec = 0;
+               } else {
+                       /*
+                        * Update the timer in case it has
+                        * overrun already.  If it has,
+                        * we'll report it as having overrun
+                        * and with the next reloaded timer
+                        * already ticking, though we are
+                        * swallowing that pending
+                        * notification here to install the
+                        * new setting.
+                        */
+                       bump_cpu_timer(timer, val);
+                       if (val < timer->it.cpu.expires) {
+                               old_expires = timer->it.cpu.expires - val;
+                               sample_to_timespec(timer->it_clock,
+                                                  old_expires,
+                                                  &old->it_value);
+                       } else {
+                               old->it_value.tv_nsec = 1;
+                               old->it_value.tv_sec = 0;
+                       }
+               }
+       }
+
+       if (unlikely(ret)) {
+               /*
+                * We are colliding with the timer actually firing.
+                * Punt after filling in the timer's old value, and
+                * disable this firing since we are already reporting
+                * it as an overrun (thanks to bump_cpu_timer above).
+                */
+               unlock_task_sighand(p, &flags);
+               goto out;
+       }
+
+       if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
+               new_expires += val;
+       }
+
+       /*
+        * Install the new expiry time (or zero).
+        * For a timer with no notification action, we don't actually
+        * arm the timer (we'll just fake it for timer_gettime).
+        */
+       timer->it.cpu.expires = new_expires;
+       if (new_expires != 0 && val < new_expires) {
+               arm_timer(timer);
+       }
+
+       unlock_task_sighand(p, &flags);
+       /*
+        * Install the new reload setting, and
+        * set up the signal and overrun bookkeeping.
+        */
+       timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
+                                               &new->it_interval);
+
+       /*
+        * This acts as a modification timestamp for the timer,
+        * so any automatic reload attempt will punt on seeing
+        * that we have reset the timer manually.
+        */
+       timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
+               ~REQUEUE_PENDING;
+       timer->it_overrun_last = 0;
+       timer->it_overrun = -1;
+
+       if (new_expires != 0 && !(val < new_expires)) {
+               /*
+                * The designated time already passed, so we notify
+                * immediately, even if the thread never runs to
+                * accumulate more time on this clock.
+                */
+               cpu_timer_fire(timer);
+       }
+
+       ret = 0;
+ out:
+       if (old) {
+               sample_to_timespec(timer->it_clock,
+                                  old_incr, &old->it_interval);
+       }
+       if (!ret)
+               posix_cpu_timer_kick_nohz();
+       return ret;
+}
+
+static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
+{
+       unsigned long long now;
+       struct task_struct *p = timer->it.cpu.task;
+
+       WARN_ON_ONCE(p == NULL);
+
+       /*
+        * Easy part: convert the reload time.
+        */
+       sample_to_timespec(timer->it_clock,
+                          timer->it.cpu.incr, &itp->it_interval);
+
+       if (timer->it.cpu.expires == 0) {       /* Timer not armed at all.  */
+               itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
+               return;
+       }
+
+       /*
+        * Sample the clock to take the difference with the expiry time.
+        */
+       if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+               cpu_clock_sample(timer->it_clock, p, &now);
+       } else {
+               struct sighand_struct *sighand;
+               unsigned long flags;
+
+               /*
+                * Protect against sighand release/switch in exit/exec and
+                * also make timer sampling safe if it ends up calling
+                * thread_group_cputime().
+                */
+               sighand = lock_task_sighand(p, &flags);
+               if (unlikely(sighand == NULL)) {
+                       /*
+                        * The process has been reaped.
+                        * We can't even collect a sample any more.
+                        * Call the timer disarmed, nothing else to do.
+                        */
+                       timer->it.cpu.expires = 0;
+                       sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
+                                          &itp->it_value);
+               } else {
+                       cpu_timer_sample_group(timer->it_clock, p, &now);
+                       unlock_task_sighand(p, &flags);
+               }
+       }
+
+       if (now < timer->it.cpu.expires) {
+               sample_to_timespec(timer->it_clock,
+                                  timer->it.cpu.expires - now,
+                                  &itp->it_value);
+       } else {
+               /*
+                * The timer should have expired already, but the firing
+                * hasn't taken place yet.  Say it's just about to expire.
+                */
+               itp->it_value.tv_nsec = 1;
+               itp->it_value.tv_sec = 0;
+       }
+}
+
+static unsigned long long
+check_timers_list(struct list_head *timers,
+                 struct list_head *firing,
+                 unsigned long long curr)
+{
+       int maxfire = 20;
+
+       while (!list_empty(timers)) {
+               struct cpu_timer_list *t;
+
+               t = list_first_entry(timers, struct cpu_timer_list, entry);
+
+               if (!--maxfire || curr < t->expires)
+                       return t->expires;
+
+               t->firing = 1;
+               list_move_tail(&t->entry, firing);
+       }
+
+       return 0;
+}
+
+/*
+ * Check for any per-thread CPU timers that have fired and move them off
+ * the tsk->cpu_timers[N] list onto the firing list.  Here we update the
+ * tsk->it_*_expires values to reflect the remaining thread CPU timers.
+ */
+static void check_thread_timers(struct task_struct *tsk,
+                               struct list_head *firing)
+{
+       struct list_head *timers = tsk->cpu_timers;
+       struct signal_struct *const sig = tsk->signal;
+       struct task_cputime *tsk_expires = &tsk->cputime_expires;
+       unsigned long long expires;
+       unsigned long soft;
+
+       expires = check_timers_list(timers, firing, prof_ticks(tsk));
+       tsk_expires->prof_exp = expires_to_cputime(expires);
+
+       expires = check_timers_list(++timers, firing, virt_ticks(tsk));
+       tsk_expires->virt_exp = expires_to_cputime(expires);
+
+       tsk_expires->sched_exp = check_timers_list(++timers, firing,
+                                                  tsk->se.sum_exec_runtime);
+
+       /*
+        * Check for the special case thread timers.
+        */
+       soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
+       if (soft != RLIM_INFINITY) {
+               unsigned long hard =
+                       ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
+
+               if (hard != RLIM_INFINITY &&
+                   tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
+                       /*
+                        * At the hard limit, we just die.
+                        * No need to calculate anything else now.
+                        */
+                       __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+                       return;
+               }
+               if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
+                       /*
+                        * At the soft limit, send a SIGXCPU every second.
+                        */
+                       if (soft < hard) {
+                               soft += USEC_PER_SEC;
+                               sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
+                       }
+                       printk(KERN_INFO
+                               "RT Watchdog Timeout: %s[%d]\n",
+                               tsk->comm, task_pid_nr(tsk));
+                       __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
+               }
+       }
+}
+
+static void stop_process_timers(struct signal_struct *sig)
+{
+       struct thread_group_cputimer *cputimer = &sig->cputimer;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&cputimer->lock, flags);
+       cputimer->running = 0;
+       raw_spin_unlock_irqrestore(&cputimer->lock, flags);
+}
+
+static u32 onecputick;
+
+static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
+                            unsigned long long *expires,
+                            unsigned long long cur_time, int signo)
+{
+       if (!it->expires)
+               return;
+
+       if (cur_time >= it->expires) {
+               if (it->incr) {
+                       it->expires += it->incr;
+                       it->error += it->incr_error;
+                       if (it->error >= onecputick) {
+                               it->expires -= cputime_one_jiffy;
+                               it->error -= onecputick;
+                       }
+               } else {
+                       it->expires = 0;
+               }
+
+               trace_itimer_expire(signo == SIGPROF ?
+                                   ITIMER_PROF : ITIMER_VIRTUAL,
+                                   tsk->signal->leader_pid, cur_time);
+               __group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
+       }
+
+       if (it->expires && (!*expires || it->expires < *expires)) {
+               *expires = it->expires;
+       }
+}
+
+/*
+ * Check for any per-thread CPU timers that have fired and move them
+ * off the tsk->*_timers list onto the firing list.  Per-thread timers
+ * have already been taken off.
+ */
+static void check_process_timers(struct task_struct *tsk,
+                                struct list_head *firing)
+{
+       struct signal_struct *const sig = tsk->signal;
+       unsigned long long utime, ptime, virt_expires, prof_expires;
+       unsigned long long sum_sched_runtime, sched_expires;
+       struct list_head *timers = sig->cpu_timers;
+       struct task_cputime cputime;
+       unsigned long soft;
+
+       /*
+        * Collect the current process totals.
+        */
+       thread_group_cputimer(tsk, &cputime);
+       utime = cputime_to_expires(cputime.utime);
+       ptime = utime + cputime_to_expires(cputime.stime);
+       sum_sched_runtime = cputime.sum_exec_runtime;
+
+       prof_expires = check_timers_list(timers, firing, ptime);
+       virt_expires = check_timers_list(++timers, firing, utime);
+       sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
+
+       /*
+        * Check for the special case process timers.
+        */
+       check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
+                        SIGPROF);
+       check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
+                        SIGVTALRM);
+       soft = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
+       if (soft != RLIM_INFINITY) {
+               unsigned long psecs = cputime_to_secs(ptime);
+               unsigned long hard =
+                       ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
+               cputime_t x;
+               if (psecs >= hard) {
+                       /*
+                        * At the hard limit, we just die.
+                        * No need to calculate anything else now.
+                        */
+                       __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+                       return;
+               }
+               if (psecs >= soft) {
+                       /*
+                        * At the soft limit, send a SIGXCPU every second.
+                        */
+                       __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
+                       if (soft < hard) {
+                               soft++;
+                               sig->rlim[RLIMIT_CPU].rlim_cur = soft;
+                       }
+               }
+               x = secs_to_cputime(soft);
+               if (!prof_expires || x < prof_expires) {
+                       prof_expires = x;
+               }
+       }
+
+       sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires);
+       sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires);
+       sig->cputime_expires.sched_exp = sched_expires;
+       if (task_cputime_zero(&sig->cputime_expires))
+               stop_process_timers(sig);
+}
+
+/*
+ * This is called from the signal code (via do_schedule_next_timer)
+ * when the last timer signal was delivered and we have to reload the timer.
+ */
+void posix_cpu_timer_schedule(struct k_itimer *timer)
+{
+       struct sighand_struct *sighand;
+       unsigned long flags;
+       struct task_struct *p = timer->it.cpu.task;
+       unsigned long long now;
+
+       WARN_ON_ONCE(p == NULL);
+
+       /*
+        * Fetch the current sample and update the timer's expiry time.
+        */
+       if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+               cpu_clock_sample(timer->it_clock, p, &now);
+               bump_cpu_timer(timer, now);
+               if (unlikely(p->exit_state))
+                       goto out;
+
+               /* Protect timer list r/w in arm_timer() */
+               sighand = lock_task_sighand(p, &flags);
+               if (!sighand)
+                       goto out;
+       } else {
+               /*
+                * Protect arm_timer() and timer sampling in case of call to
+                * thread_group_cputime().
+                */
+               sighand = lock_task_sighand(p, &flags);
+               if (unlikely(sighand == NULL)) {
+                       /*
+                        * The process has been reaped.
+                        * We can't even collect a sample any more.
+                        */
+                       timer->it.cpu.expires = 0;
+                       goto out;
+               } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
+                       unlock_task_sighand(p, &flags);
+                       /* Optimizations: if the process is dying, no need to rearm */
+                       goto out;
+               }
+               cpu_timer_sample_group(timer->it_clock, p, &now);
+               bump_cpu_timer(timer, now);
+               /* Leave the sighand locked for the call below.  */
+       }
+
+       /*
+        * Now re-arm for the new expiry time.
+        */
+       WARN_ON_ONCE(!irqs_disabled());
+       arm_timer(timer);
+       unlock_task_sighand(p, &flags);
+
+       /* Kick full dynticks CPUs in case they need to tick on the new timer */
+       posix_cpu_timer_kick_nohz();
+out:
+       timer->it_overrun_last = timer->it_overrun;
+       timer->it_overrun = -1;
+       ++timer->it_requeue_pending;
+}
+
+/**
+ * task_cputime_expired - Compare two task_cputime entities.
+ *
+ * @sample:    The task_cputime structure to be checked for expiration.
+ * @expires:   Expiration times, against which @sample will be checked.
+ *
+ * Checks @sample against @expires to see if any field of @sample has expired.
+ * Returns true if any field of the former is greater than the corresponding
+ * field of the latter if the latter field is set.  Otherwise returns false.
+ */
+static inline int task_cputime_expired(const struct task_cputime *sample,
+                                       const struct task_cputime *expires)
+{
+       if (expires->utime && sample->utime >= expires->utime)
+               return 1;
+       if (expires->stime && sample->utime + sample->stime >= expires->stime)
+               return 1;
+       if (expires->sum_exec_runtime != 0 &&
+           sample->sum_exec_runtime >= expires->sum_exec_runtime)
+               return 1;
+       return 0;
+}
+
+/**
+ * fastpath_timer_check - POSIX CPU timers fast path.
+ *
+ * @tsk:       The task (thread) being checked.
+ *
+ * Check the task and thread group timers.  If both are zero (there are no
+ * timers set) return false.  Otherwise snapshot the task and thread group
+ * timers and compare them with the corresponding expiration times.  Return
+ * true if a timer has expired, else return false.
+ */
+static inline int fastpath_timer_check(struct task_struct *tsk)
+{
+       struct signal_struct *sig;
+       cputime_t utime, stime;
+
+       task_cputime(tsk, &utime, &stime);
+
+       if (!task_cputime_zero(&tsk->cputime_expires)) {
+               struct task_cputime task_sample = {
+                       .utime = utime,
+                       .stime = stime,
+                       .sum_exec_runtime = tsk->se.sum_exec_runtime
+               };
+
+               if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+                       return 1;
+       }
+
+       sig = tsk->signal;
+       if (sig->cputimer.running) {
+               struct task_cputime group_sample;
+
+               raw_spin_lock(&sig->cputimer.lock);
+               group_sample = sig->cputimer.cputime;
+               raw_spin_unlock(&sig->cputimer.lock);
+
+               if (task_cputime_expired(&group_sample, &sig->cputime_expires))
+                       return 1;
+       }
+
+       return 0;
+}
+
+/*
+ * This is called from the timer interrupt handler.  The irq handler has
+ * already updated our counts.  We need to check if any timers fire now.
+ * Interrupts are disabled.
+ */
+void run_posix_cpu_timers(struct task_struct *tsk)
+{
+       LIST_HEAD(firing);
+       struct k_itimer *timer, *next;
+       unsigned long flags;
+
+       WARN_ON_ONCE(!irqs_disabled());
+
+       /*
+        * The fast path checks that there are no expired thread or thread
+        * group timers.  If that's so, just return.
+        */
+       if (!fastpath_timer_check(tsk))
+               return;
+
+       if (!lock_task_sighand(tsk, &flags))
+               return;
+       /*
+        * Here we take off tsk->signal->cpu_timers[N] and
+        * tsk->cpu_timers[N] all the timers that are firing, and
+        * put them on the firing list.
+        */
+       check_thread_timers(tsk, &firing);
+       /*
+        * If there are any active process wide timers (POSIX 1.b, itimers,
+        * RLIMIT_CPU) cputimer must be running.
+        */
+       if (tsk->signal->cputimer.running)
+               check_process_timers(tsk, &firing);
+
+       /*
+        * We must release these locks before taking any timer's lock.
+        * There is a potential race with timer deletion here, as the
+        * siglock now protects our private firing list.  We have set
+        * the firing flag in each timer, so that a deletion attempt
+        * that gets the timer lock before we do will give it up and
+        * spin until we've taken care of that timer below.
+        */
+       unlock_task_sighand(tsk, &flags);
+
+       /*
+        * Now that all the timers on our list have the firing flag,
+        * no one will touch their list entries but us.  We'll take
+        * each timer's lock before clearing its firing flag, so no
+        * timer call will interfere.
+        */
+       list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
+               int cpu_firing;
+
+               spin_lock(&timer->it_lock);
+               list_del_init(&timer->it.cpu.entry);
+               cpu_firing = timer->it.cpu.firing;
+               timer->it.cpu.firing = 0;
+               /*
+                * The firing flag is -1 if we collided with a reset
+                * of the timer, which already reported this
+                * almost-firing as an overrun.  So don't generate an event.
+                */
+               if (likely(cpu_firing >= 0))
+                       cpu_timer_fire(timer);
+               spin_unlock(&timer->it_lock);
+       }
+}
+
+/*
+ * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
+ * The tsk->sighand->siglock must be held by the caller.
+ */
+void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
+                          cputime_t *newval, cputime_t *oldval)
+{
+       unsigned long long now;
+
+       WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
+       cpu_timer_sample_group(clock_idx, tsk, &now);
+
+       if (oldval) {
+               /*
+                * We are setting itimer. The *oldval is absolute and we update
+                * it to be relative, *newval argument is relative and we update
+                * it to be absolute.
+                */
+               if (*oldval) {
+                       if (*oldval <= now) {
+                               /* Just about to fire. */
+                               *oldval = cputime_one_jiffy;
+                       } else {
+                               *oldval -= now;
+                       }
+               }
+
+               if (!*newval)
+                       goto out;
+               *newval += now;
+       }
+
+       /*
+        * Update expiration cache if we are the earliest timer, or eventually
+        * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
+        */
+       switch (clock_idx) {
+       case CPUCLOCK_PROF:
+               if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
+                       tsk->signal->cputime_expires.prof_exp = *newval;
+               break;
+       case CPUCLOCK_VIRT:
+               if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
+                       tsk->signal->cputime_expires.virt_exp = *newval;
+               break;
+       }
+out:
+       posix_cpu_timer_kick_nohz();
+}
+
+static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
+                           struct timespec *rqtp, struct itimerspec *it)
+{
+       struct k_itimer timer;
+       int error;
+
+       /*
+        * Set up a temporary timer and then wait for it to go off.
+        */
+       memset(&timer, 0, sizeof timer);
+       spin_lock_init(&timer.it_lock);
+       timer.it_clock = which_clock;
+       timer.it_overrun = -1;
+       error = posix_cpu_timer_create(&timer);
+       timer.it_process = current;
+       if (!error) {
+               static struct itimerspec zero_it;
+
+               memset(it, 0, sizeof *it);
+               it->it_value = *rqtp;
+
+               spin_lock_irq(&timer.it_lock);
+               error = posix_cpu_timer_set(&timer, flags, it, NULL);
+               if (error) {
+                       spin_unlock_irq(&timer.it_lock);
+                       return error;
+               }
+
+               while (!signal_pending(current)) {
+                       if (timer.it.cpu.expires == 0) {
+                               /*
+                                * Our timer fired and was reset, below
+                                * deletion can not fail.
+                                */
+                               posix_cpu_timer_del(&timer);
+                               spin_unlock_irq(&timer.it_lock);
+                               return 0;
+                       }
+
+                       /*
+                        * Block until cpu_timer_fire (or a signal) wakes us.
+                        */
+                       __set_current_state(TASK_INTERRUPTIBLE);
+                       spin_unlock_irq(&timer.it_lock);
+                       schedule();
+                       spin_lock_irq(&timer.it_lock);
+               }
+
+               /*
+                * We were interrupted by a signal.
+                */
+               sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
+               error = posix_cpu_timer_set(&timer, 0, &zero_it, it);
+               if (!error) {
+                       /*
+                        * Timer is now unarmed, deletion can not fail.
+                        */
+                       posix_cpu_timer_del(&timer);
+               }
+               spin_unlock_irq(&timer.it_lock);
+
+               while (error == TIMER_RETRY) {
+                       /*
+                        * We need to handle case when timer was or is in the
+                        * middle of firing. In other cases we already freed
+                        * resources.
+                        */
+                       spin_lock_irq(&timer.it_lock);
+                       error = posix_cpu_timer_del(&timer);
+                       spin_unlock_irq(&timer.it_lock);
+               }
+
+               if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
+                       /*
+                        * It actually did fire already.
+                        */
+                       return 0;
+               }
+
+               error = -ERESTART_RESTARTBLOCK;
+       }
+
+       return error;
+}
+
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
+
+static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
+                           struct timespec *rqtp, struct timespec __user *rmtp)
+{
+       struct restart_block *restart_block =
+               &current_thread_info()->restart_block;
+       struct itimerspec it;
+       int error;
+
+       /*
+        * Diagnose required errors first.
+        */
+       if (CPUCLOCK_PERTHREAD(which_clock) &&
+           (CPUCLOCK_PID(which_clock) == 0 ||
+            CPUCLOCK_PID(which_clock) == current->pid))
+               return -EINVAL;
+
+       error = do_cpu_nanosleep(which_clock, flags, rqtp, &it);
+
+       if (error == -ERESTART_RESTARTBLOCK) {
+
+               if (flags & TIMER_ABSTIME)
+                       return -ERESTARTNOHAND;
+               /*
+                * Report back to the user the time still remaining.
+                */
+               if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+                       return -EFAULT;
+
+               restart_block->fn = posix_cpu_nsleep_restart;
+               restart_block->nanosleep.clockid = which_clock;
+               restart_block->nanosleep.rmtp = rmtp;
+               restart_block->nanosleep.expires = timespec_to_ns(rqtp);
+       }
+       return error;
+}
+
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
+{
+       clockid_t which_clock = restart_block->nanosleep.clockid;
+       struct timespec t;
+       struct itimerspec it;
+       int error;
+
+       t = ns_to_timespec(restart_block->nanosleep.expires);
+
+       error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
+
+       if (error == -ERESTART_RESTARTBLOCK) {
+               struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
+               /*
+                * Report back to the user the time still remaining.
+                */
+               if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+                       return -EFAULT;
+
+               restart_block->nanosleep.expires = timespec_to_ns(&t);
+       }
+       return error;
+
+}
+
+#define PROCESS_CLOCK  MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
+#define THREAD_CLOCK   MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
+
+static int process_cpu_clock_getres(const clockid_t which_clock,
+                                   struct timespec *tp)
+{
+       return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
+}
+static int process_cpu_clock_get(const clockid_t which_clock,
+                                struct timespec *tp)
+{
+       return posix_cpu_clock_get(PROCESS_CLOCK, tp);
+}
+static int process_cpu_timer_create(struct k_itimer *timer)
+{
+       timer->it_clock = PROCESS_CLOCK;
+       return posix_cpu_timer_create(timer);
+}
+static int process_cpu_nsleep(const clockid_t which_clock, int flags,
+                             struct timespec *rqtp,
+                             struct timespec __user *rmtp)
+{
+       return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
+}
+static long process_cpu_nsleep_restart(struct restart_block *restart_block)
+{
+       return -EINVAL;
+}
+static int thread_cpu_clock_getres(const clockid_t which_clock,
+                                  struct timespec *tp)
+{
+       return posix_cpu_clock_getres(THREAD_CLOCK, tp);
+}
+static int thread_cpu_clock_get(const clockid_t which_clock,
+                               struct timespec *tp)
+{
+       return posix_cpu_clock_get(THREAD_CLOCK, tp);
+}
+static int thread_cpu_timer_create(struct k_itimer *timer)
+{
+       timer->it_clock = THREAD_CLOCK;
+       return posix_cpu_timer_create(timer);
+}
+
+struct k_clock clock_posix_cpu = {
+       .clock_getres   = posix_cpu_clock_getres,
+       .clock_set      = posix_cpu_clock_set,
+       .clock_get      = posix_cpu_clock_get,
+       .timer_create   = posix_cpu_timer_create,
+       .nsleep         = posix_cpu_nsleep,
+       .nsleep_restart = posix_cpu_nsleep_restart,
+       .timer_set      = posix_cpu_timer_set,
+       .timer_del      = posix_cpu_timer_del,
+       .timer_get      = posix_cpu_timer_get,
+};
+
+static __init int init_posix_cpu_timers(void)
+{
+       struct k_clock process = {
+               .clock_getres   = process_cpu_clock_getres,
+               .clock_get      = process_cpu_clock_get,
+               .timer_create   = process_cpu_timer_create,
+               .nsleep         = process_cpu_nsleep,
+               .nsleep_restart = process_cpu_nsleep_restart,
+       };
+       struct k_clock thread = {
+               .clock_getres   = thread_cpu_clock_getres,
+               .clock_get      = thread_cpu_clock_get,
+               .timer_create   = thread_cpu_timer_create,
+       };
+       struct timespec ts;
+
+       posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
+       posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
+
+       cputime_to_timespec(cputime_one_jiffy, &ts);
+       onecputick = ts.tv_nsec;
+       WARN_ON(ts.tv_sec != 0);
+
+       return 0;
+}
+__initcall(init_posix_cpu_timers);
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
new file mode 100644 (file)
index 0000000..424c2d4
--- /dev/null
@@ -0,0 +1,1121 @@
+/*
+ * linux/kernel/posix-timers.c
+ *
+ *
+ * 2002-10-15  Posix Clocks & timers
+ *                           by George Anzinger george@mvista.com
+ *
+ *                          Copyright (C) 2002 2003 by MontaVista Software.
+ *
+ * 2004-06-01  Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
+ *                          Copyright (C) 2004 Boris Hu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * MontaVista Software | 1237 East Arques Avenue | Sunnyvale | CA 94085 | USA
+ */
+
+/* These are all the functions necessary to implement
+ * POSIX clocks & timers
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/mutex.h>
+
+#include <asm/uaccess.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/compiler.h>
+#include <linux/hash.h>
+#include <linux/posix-clock.h>
+#include <linux/posix-timers.h>
+#include <linux/syscalls.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <linux/export.h>
+#include <linux/hashtable.h>
+
+/*
+ * Management arrays for POSIX timers. Timers are now kept in static hash table
+ * with 512 entries.
+ * Timer ids are allocated by local routine, which selects proper hash head by
+ * key, constructed from current->signal address and per signal struct counter.
+ * This keeps timer ids unique per process, but now they can intersect between
+ * processes.
+ */
+
+/*
+ * Lets keep our timers in a slab cache :-)
+ */
+static struct kmem_cache *posix_timers_cache;
+
+static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
+static DEFINE_SPINLOCK(hash_lock);
+
+/*
+ * we assume that the new SIGEV_THREAD_ID shares no bits with the other
+ * SIGEV values.  Here we put out an error if this assumption fails.
+ */
+#if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
+                       ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
+#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
+#endif
+
+/*
+ * parisc wants ENOTSUP instead of EOPNOTSUPP
+ */
+#ifndef ENOTSUP
+# define ENANOSLEEP_NOTSUP EOPNOTSUPP
+#else
+# define ENANOSLEEP_NOTSUP ENOTSUP
+#endif
+
+/*
+ * The timer ID is turned into a timer address by idr_find().
+ * Verifying a valid ID consists of:
+ *
+ * a) checking that idr_find() returns other than -1.
+ * b) checking that the timer id matches the one in the timer itself.
+ * c) that the timer owner is in the callers thread group.
+ */
+
+/*
+ * CLOCKs: The POSIX standard calls for a couple of clocks and allows us
+ *         to implement others.  This structure defines the various
+ *         clocks.
+ *
+ * RESOLUTION: Clock resolution is used to round up timer and interval
+ *         times, NOT to report clock times, which are reported with as
+ *         much resolution as the system can muster.  In some cases this
+ *         resolution may depend on the underlying clock hardware and
+ *         may not be quantifiable until run time, and only then is the
+ *         necessary code is written.  The standard says we should say
+ *         something about this issue in the documentation...
+ *
+ * FUNCTIONS: The CLOCKs structure defines possible functions to
+ *         handle various clock functions.
+ *
+ *         The standard POSIX timer management code assumes the
+ *         following: 1.) The k_itimer struct (sched.h) is used for
+ *         the timer.  2.) The list, it_lock, it_clock, it_id and
+ *         it_pid fields are not modified by timer code.
+ *
+ * Permissions: It is assumed that the clock_settime() function defined
+ *         for each clock will take care of permission checks.  Some
+ *         clocks may be set able by any user (i.e. local process
+ *         clocks) others not.  Currently the only set able clock we
+ *         have is CLOCK_REALTIME and its high res counter part, both of
+ *         which we beg off on and pass to do_sys_settimeofday().
+ */
+
+static struct k_clock posix_clocks[MAX_CLOCKS];
+
+/*
+ * These ones are defined below.
+ */
+static int common_nsleep(const clockid_t, int flags, struct timespec *t,
+                        struct timespec __user *rmtp);
+static int common_timer_create(struct k_itimer *new_timer);
+static void common_timer_get(struct k_itimer *, struct itimerspec *);
+static int common_timer_set(struct k_itimer *, int,
+                           struct itimerspec *, struct itimerspec *);
+static int common_timer_del(struct k_itimer *timer);
+
+static enum hrtimer_restart posix_timer_fn(struct hrtimer *data);
+
+static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
+
+#define lock_timer(tid, flags)                                            \
+({     struct k_itimer *__timr;                                           \
+       __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags));  \
+       __timr;                                                            \
+})
+
+static int hash(struct signal_struct *sig, unsigned int nr)
+{
+       return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
+}
+
+static struct k_itimer *__posix_timers_find(struct hlist_head *head,
+                                           struct signal_struct *sig,
+                                           timer_t id)
+{
+       struct k_itimer *timer;
+
+       hlist_for_each_entry_rcu(timer, head, t_hash) {
+               if ((timer->it_signal == sig) && (timer->it_id == id))
+                       return timer;
+       }
+       return NULL;
+}
+
+static struct k_itimer *posix_timer_by_id(timer_t id)
+{
+       struct signal_struct *sig = current->signal;
+       struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
+
+       return __posix_timers_find(head, sig, id);
+}
+
+static int posix_timer_add(struct k_itimer *timer)
+{
+       struct signal_struct *sig = current->signal;
+       int first_free_id = sig->posix_timer_id;
+       struct hlist_head *head;
+       int ret = -ENOENT;
+
+       do {
+               spin_lock(&hash_lock);
+               head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
+               if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
+                       hlist_add_head_rcu(&timer->t_hash, head);
+                       ret = sig->posix_timer_id;
+               }
+               if (++sig->posix_timer_id < 0)
+                       sig->posix_timer_id = 0;
+               if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
+                       /* Loop over all possible ids completed */
+                       ret = -EAGAIN;
+               spin_unlock(&hash_lock);
+       } while (ret == -ENOENT);
+       return ret;
+}
+
+static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
+{
+       spin_unlock_irqrestore(&timr->it_lock, flags);
+}
+
+/* Get clock_realtime */
+static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp)
+{
+       ktime_get_real_ts(tp);
+       return 0;
+}
+
+/* Set clock_realtime */
+static int posix_clock_realtime_set(const clockid_t which_clock,
+                                   const struct timespec *tp)
+{
+       return do_sys_settimeofday(tp, NULL);
+}
+
+static int posix_clock_realtime_adj(const clockid_t which_clock,
+                                   struct timex *t)
+{
+       return do_adjtimex(t);
+}
+
+/*
+ * Get monotonic time for posix timers
+ */
+static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
+{
+       ktime_get_ts(tp);
+       return 0;
+}
+
+/*
+ * Get monotonic-raw time for posix timers
+ */
+static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
+{
+       getrawmonotonic(tp);
+       return 0;
+}
+
+
+static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp)
+{
+       *tp = current_kernel_time();
+       return 0;
+}
+
+static int posix_get_monotonic_coarse(clockid_t which_clock,
+                                               struct timespec *tp)
+{
+       *tp = get_monotonic_coarse();
+       return 0;
+}
+
+static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
+{
+       *tp = ktime_to_timespec(KTIME_LOW_RES);
+       return 0;
+}
+
+static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
+{
+       get_monotonic_boottime(tp);
+       return 0;
+}
+
+static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
+{
+       timekeeping_clocktai(tp);
+       return 0;
+}
+
+/*
+ * Initialize everything, well, just everything in Posix clocks/timers ;)
+ */
+static __init int init_posix_timers(void)
+{
+       struct k_clock clock_realtime = {
+               .clock_getres   = hrtimer_get_res,
+               .clock_get      = posix_clock_realtime_get,
+               .clock_set      = posix_clock_realtime_set,
+               .clock_adj      = posix_clock_realtime_adj,
+               .nsleep         = common_nsleep,
+               .nsleep_restart = hrtimer_nanosleep_restart,
+               .timer_create   = common_timer_create,
+               .timer_set      = common_timer_set,
+               .timer_get      = common_timer_get,
+               .timer_del      = common_timer_del,
+       };
+       struct k_clock clock_monotonic = {
+               .clock_getres   = hrtimer_get_res,
+               .clock_get      = posix_ktime_get_ts,
+               .nsleep         = common_nsleep,
+               .nsleep_restart = hrtimer_nanosleep_restart,
+               .timer_create   = common_timer_create,
+               .timer_set      = common_timer_set,
+               .timer_get      = common_timer_get,
+               .timer_del      = common_timer_del,
+       };
+       struct k_clock clock_monotonic_raw = {
+               .clock_getres   = hrtimer_get_res,
+               .clock_get      = posix_get_monotonic_raw,
+       };
+       struct k_clock clock_realtime_coarse = {
+               .clock_getres   = posix_get_coarse_res,
+               .clock_get      = posix_get_realtime_coarse,
+       };
+       struct k_clock clock_monotonic_coarse = {
+               .clock_getres   = posix_get_coarse_res,
+               .clock_get      = posix_get_monotonic_coarse,
+       };
+       struct k_clock clock_tai = {
+               .clock_getres   = hrtimer_get_res,
+               .clock_get      = posix_get_tai,
+               .nsleep         = common_nsleep,
+               .nsleep_restart = hrtimer_nanosleep_restart,
+               .timer_create   = common_timer_create,
+               .timer_set      = common_timer_set,
+               .timer_get      = common_timer_get,
+               .timer_del      = common_timer_del,
+       };
+       struct k_clock clock_boottime = {
+               .clock_getres   = hrtimer_get_res,
+               .clock_get      = posix_get_boottime,
+               .nsleep         = common_nsleep,
+               .nsleep_restart = hrtimer_nanosleep_restart,
+               .timer_create   = common_timer_create,
+               .timer_set      = common_timer_set,
+               .timer_get      = common_timer_get,
+               .timer_del      = common_timer_del,
+       };
+
+       posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
+       posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
+       posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
+       posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
+       posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
+       posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
+       posix_timers_register_clock(CLOCK_TAI, &clock_tai);
+
+       posix_timers_cache = kmem_cache_create("posix_timers_cache",
+                                       sizeof (struct k_itimer), 0, SLAB_PANIC,
+                                       NULL);
+       return 0;
+}
+
+__initcall(init_posix_timers);
+
+static void schedule_next_timer(struct k_itimer *timr)
+{
+       struct hrtimer *timer = &timr->it.real.timer;
+
+       if (timr->it.real.interval.tv64 == 0)
+               return;
+
+       timr->it_overrun += (unsigned int) hrtimer_forward(timer,
+                                               timer->base->get_time(),
+                                               timr->it.real.interval);
+
+       timr->it_overrun_last = timr->it_overrun;
+       timr->it_overrun = -1;
+       ++timr->it_requeue_pending;
+       hrtimer_restart(timer);
+}
+
+/*
+ * This function is exported for use by the signal deliver code.  It is
+ * called just prior to the info block being released and passes that
+ * block to us.  It's function is to update the overrun entry AND to
+ * restart the timer.  It should only be called if the timer is to be
+ * restarted (i.e. we have flagged this in the sys_private entry of the
+ * info block).
+ *
+ * To protect against the timer going away while the interrupt is queued,
+ * we require that the it_requeue_pending flag be set.
+ */
+void do_schedule_next_timer(struct siginfo *info)
+{
+       struct k_itimer *timr;
+       unsigned long flags;
+
+       timr = lock_timer(info->si_tid, &flags);
+
+       if (timr && timr->it_requeue_pending == info->si_sys_private) {
+               if (timr->it_clock < 0)
+                       posix_cpu_timer_schedule(timr);
+               else
+                       schedule_next_timer(timr);
+
+               info->si_overrun += timr->it_overrun_last;
+       }
+
+       if (timr)
+               unlock_timer(timr, flags);
+}
+
+int posix_timer_event(struct k_itimer *timr, int si_private)
+{
+       struct task_struct *task;
+       int shared, ret = -1;
+       /*
+        * FIXME: if ->sigq is queued we can race with
+        * dequeue_signal()->do_schedule_next_timer().
+        *
+        * If dequeue_signal() sees the "right" value of
+        * si_sys_private it calls do_schedule_next_timer().
+        * We re-queue ->sigq and drop ->it_lock().
+        * do_schedule_next_timer() locks the timer
+        * and re-schedules it while ->sigq is pending.
+        * Not really bad, but not that we want.
+        */
+       timr->sigq->info.si_sys_private = si_private;
+
+       rcu_read_lock();
+       task = pid_task(timr->it_pid, PIDTYPE_PID);
+       if (task) {
+               shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
+               ret = send_sigqueue(timr->sigq, task, shared);
+       }
+       rcu_read_unlock();
+       /* If we failed to send the signal the timer stops. */
+       return ret > 0;
+}
+EXPORT_SYMBOL_GPL(posix_timer_event);
+
+/*
+ * This function gets called when a POSIX.1b interval timer expires.  It
+ * is used as a callback from the kernel internal timer.  The
+ * run_timer_list code ALWAYS calls with interrupts on.
+
+ * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
+ */
+static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
+{
+       struct k_itimer *timr;
+       unsigned long flags;
+       int si_private = 0;
+       enum hrtimer_restart ret = HRTIMER_NORESTART;
+
+       timr = container_of(timer, struct k_itimer, it.real.timer);
+       spin_lock_irqsave(&timr->it_lock, flags);
+
+       if (timr->it.real.interval.tv64 != 0)
+               si_private = ++timr->it_requeue_pending;
+
+       if (posix_timer_event(timr, si_private)) {
+               /*
+                * signal was not sent because of sig_ignor
+                * we will not get a call back to restart it AND
+                * it should be restarted.
+                */
+               if (timr->it.real.interval.tv64 != 0) {
+                       ktime_t now = hrtimer_cb_get_time(timer);
+
+                       /*
+                        * FIXME: What we really want, is to stop this
+                        * timer completely and restart it in case the
+                        * SIG_IGN is removed. This is a non trivial
+                        * change which involves sighand locking
+                        * (sigh !), which we don't want to do late in
+                        * the release cycle.
+                        *
+                        * For now we just let timers with an interval
+                        * less than a jiffie expire every jiffie to
+                        * avoid softirq starvation in case of SIG_IGN
+                        * and a very small interval, which would put
+                        * the timer right back on the softirq pending
+                        * list. By moving now ahead of time we trick
+                        * hrtimer_forward() to expire the timer
+                        * later, while we still maintain the overrun
+                        * accuracy, but have some inconsistency in
+                        * the timer_gettime() case. This is at least
+                        * better than a starved softirq. A more
+                        * complex fix which solves also another related
+                        * inconsistency is already in the pipeline.
+                        */
+#ifdef CONFIG_HIGH_RES_TIMERS
+                       {
+                               ktime_t kj = ktime_set(0, NSEC_PER_SEC / HZ);
+
+                               if (timr->it.real.interval.tv64 < kj.tv64)
+                                       now = ktime_add(now, kj);
+                       }
+#endif
+                       timr->it_overrun += (unsigned int)
+                               hrtimer_forward(timer, now,
+                                               timr->it.real.interval);
+                       ret = HRTIMER_RESTART;
+                       ++timr->it_requeue_pending;
+               }
+       }
+
+       unlock_timer(timr, flags);
+       return ret;
+}
+
+static struct pid *good_sigevent(sigevent_t * event)
+{
+       struct task_struct *rtn = current->group_leader;
+
+       if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+               (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
+                !same_thread_group(rtn, current) ||
+                (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
+               return NULL;
+
+       if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
+           ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
+               return NULL;
+
+       return task_pid(rtn);
+}
+
+void posix_timers_register_clock(const clockid_t clock_id,
+                                struct k_clock *new_clock)
+{
+       if ((unsigned) clock_id >= MAX_CLOCKS) {
+               printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
+                      clock_id);
+               return;
+       }
+
+       if (!new_clock->clock_get) {
+               printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
+                      clock_id);
+               return;
+       }
+       if (!new_clock->clock_getres) {
+               printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
+                      clock_id);
+               return;
+       }
+
+       posix_clocks[clock_id] = *new_clock;
+}
+EXPORT_SYMBOL_GPL(posix_timers_register_clock);
+
+static struct k_itimer * alloc_posix_timer(void)
+{
+       struct k_itimer *tmr;
+       tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
+       if (!tmr)
+               return tmr;
+       if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
+               kmem_cache_free(posix_timers_cache, tmr);
+               return NULL;
+       }
+       memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
+       return tmr;
+}
+
+static void k_itimer_rcu_free(struct rcu_head *head)
+{
+       struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
+
+       kmem_cache_free(posix_timers_cache, tmr);
+}
+
+#define IT_ID_SET      1
+#define IT_ID_NOT_SET  0
+static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
+{
+       if (it_id_set) {
+               unsigned long flags;
+               spin_lock_irqsave(&hash_lock, flags);
+               hlist_del_rcu(&tmr->t_hash);
+               spin_unlock_irqrestore(&hash_lock, flags);
+       }
+       put_pid(tmr->it_pid);
+       sigqueue_free(tmr->sigq);
+       call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
+}
+
+static struct k_clock *clockid_to_kclock(const clockid_t id)
+{
+       if (id < 0)
+               return (id & CLOCKFD_MASK) == CLOCKFD ?
+                       &clock_posix_dynamic : &clock_posix_cpu;
+
+       if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres)
+               return NULL;
+       return &posix_clocks[id];
+}
+
+static int common_timer_create(struct k_itimer *new_timer)
+{
+       hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
+       return 0;
+}
+
+/* Create a POSIX.1b interval timer. */
+
+SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
+               struct sigevent __user *, timer_event_spec,
+               timer_t __user *, created_timer_id)
+{
+       struct k_clock *kc = clockid_to_kclock(which_clock);
+       struct k_itimer *new_timer;
+       int error, new_timer_id;
+       sigevent_t event;
+       int it_id_set = IT_ID_NOT_SET;
+
+       if (!kc)
+               return -EINVAL;
+       if (!kc->timer_create)
+               return -EOPNOTSUPP;
+
+       new_timer = alloc_posix_timer();
+       if (unlikely(!new_timer))
+               return -EAGAIN;
+
+       spin_lock_init(&new_timer->it_lock);
+       new_timer_id = posix_timer_add(new_timer);
+       if (new_timer_id < 0) {
+               error = new_timer_id;
+               goto out;
+       }
+
+       it_id_set = IT_ID_SET;
+       new_timer->it_id = (timer_t) new_timer_id;
+       new_timer->it_clock = which_clock;
+       new_timer->it_overrun = -1;
+
+       if (timer_event_spec) {
+               if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
+                       error = -EFAULT;
+                       goto out;
+               }
+               rcu_read_lock();
+               new_timer->it_pid = get_pid(good_sigevent(&event));
+               rcu_read_unlock();
+               if (!new_timer->it_pid) {
+                       error = -EINVAL;
+                       goto out;
+               }
+       } else {
+               event.sigev_notify = SIGEV_SIGNAL;
+               event.sigev_signo = SIGALRM;
+               event.sigev_value.sival_int = new_timer->it_id;
+               new_timer->it_pid = get_pid(task_tgid(current));
+       }
+
+       new_timer->it_sigev_notify     = event.sigev_notify;
+       new_timer->sigq->info.si_signo = event.sigev_signo;
+       new_timer->sigq->info.si_value = event.sigev_value;
+       new_timer->sigq->info.si_tid   = new_timer->it_id;
+       new_timer->sigq->info.si_code  = SI_TIMER;
+
+       if (copy_to_user(created_timer_id,
+                        &new_timer_id, sizeof (new_timer_id))) {
+               error = -EFAULT;
+               goto out;
+       }
+
+       error = kc->timer_create(new_timer);
+       if (error)
+               goto out;
+
+       spin_lock_irq(&current->sighand->siglock);
+       new_timer->it_signal = current->signal;
+       list_add(&new_timer->list, &current->signal->posix_timers);
+       spin_unlock_irq(&current->sighand->siglock);
+
+       return 0;
+       /*
+        * In the case of the timer belonging to another task, after
+        * the task is unlocked, the timer is owned by the other task
+        * and may cease to exist at any time.  Don't use or modify
+        * new_timer after the unlock call.
+        */
+out:
+       release_posix_timer(new_timer, it_id_set);
+       return error;
+}
+
+/*
+ * Locking issues: We need to protect the result of the id look up until
+ * we get the timer locked down so it is not deleted under us.  The
+ * removal is done under the idr spinlock so we use that here to bridge
+ * the find to the timer lock.  To avoid a dead lock, the timer id MUST
+ * be release with out holding the timer lock.
+ */
+static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
+{
+       struct k_itimer *timr;
+
+       /*
+        * timer_t could be any type >= int and we want to make sure any
+        * @timer_id outside positive int range fails lookup.
+        */
+       if ((unsigned long long)timer_id > INT_MAX)
+               return NULL;
+
+       rcu_read_lock();
+       timr = posix_timer_by_id(timer_id);
+       if (timr) {
+               spin_lock_irqsave(&timr->it_lock, *flags);
+               if (timr->it_signal == current->signal) {
+                       rcu_read_unlock();
+                       return timr;
+               }
+               spin_unlock_irqrestore(&timr->it_lock, *flags);
+       }
+       rcu_read_unlock();
+
+       return NULL;
+}
+
+/*
+ * Get the time remaining on a POSIX.1b interval timer.  This function
+ * is ALWAYS called with spin_lock_irq on the timer, thus it must not
+ * mess with irq.
+ *
+ * We have a couple of messes to clean up here.  First there is the case
+ * of a timer that has a requeue pending.  These timers should appear to
+ * be in the timer list with an expiry as if we were to requeue them
+ * now.
+ *
+ * The second issue is the SIGEV_NONE timer which may be active but is
+ * not really ever put in the timer list (to save system resources).
+ * This timer may be expired, and if so, we will do it here.  Otherwise
+ * it is the same as a requeue pending timer WRT to what we should
+ * report.
+ */
+static void
+common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
+{
+       ktime_t now, remaining, iv;
+       struct hrtimer *timer = &timr->it.real.timer;
+
+       memset(cur_setting, 0, sizeof(struct itimerspec));
+
+       iv = timr->it.real.interval;
+
+       /* interval timer ? */
+       if (iv.tv64)
+               cur_setting->it_interval = ktime_to_timespec(iv);
+       else if (!hrtimer_active(timer) &&
+                (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
+               return;
+
+       now = timer->base->get_time();
+
+       /*
+        * When a requeue is pending or this is a SIGEV_NONE
+        * timer move the expiry time forward by intervals, so
+        * expiry is > now.
+        */
+       if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
+           (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
+               timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
+
+       remaining = ktime_sub(hrtimer_get_expires(timer), now);
+       /* Return 0 only, when the timer is expired and not pending */
+       if (remaining.tv64 <= 0) {
+               /*
+                * A single shot SIGEV_NONE timer must return 0, when
+                * it is expired !
+                */
+               if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
+                       cur_setting->it_value.tv_nsec = 1;
+       } else
+               cur_setting->it_value = ktime_to_timespec(remaining);
+}
+
+/* Get the time remaining on a POSIX.1b interval timer. */
+SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
+               struct itimerspec __user *, setting)
+{
+       struct itimerspec cur_setting;
+       struct k_itimer *timr;
+       struct k_clock *kc;
+       unsigned long flags;
+       int ret = 0;
+
+       timr = lock_timer(timer_id, &flags);
+       if (!timr)
+               return -EINVAL;
+
+       kc = clockid_to_kclock(timr->it_clock);
+       if (WARN_ON_ONCE(!kc || !kc->timer_get))
+               ret = -EINVAL;
+       else
+               kc->timer_get(timr, &cur_setting);
+
+       unlock_timer(timr, flags);
+
+       if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
+               return -EFAULT;
+
+       return ret;
+}
+
+/*
+ * Get the number of overruns of a POSIX.1b interval timer.  This is to
+ * be the overrun of the timer last delivered.  At the same time we are
+ * accumulating overruns on the next timer.  The overrun is frozen when
+ * the signal is delivered, either at the notify time (if the info block
+ * is not queued) or at the actual delivery time (as we are informed by
+ * the call back to do_schedule_next_timer().  So all we need to do is
+ * to pick up the frozen overrun.
+ */
+SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
+{
+       struct k_itimer *timr;
+       int overrun;
+       unsigned long flags;
+
+       timr = lock_timer(timer_id, &flags);
+       if (!timr)
+               return -EINVAL;
+
+       overrun = timr->it_overrun_last;
+       unlock_timer(timr, flags);
+
+       return overrun;
+}
+
+/* Set a POSIX.1b interval timer. */
+/* timr->it_lock is taken. */
+static int
+common_timer_set(struct k_itimer *timr, int flags,
+                struct itimerspec *new_setting, struct itimerspec *old_setting)
+{
+       struct hrtimer *timer = &timr->it.real.timer;
+       enum hrtimer_mode mode;
+
+       if (old_setting)
+               common_timer_get(timr, old_setting);
+
+       /* disable the timer */
+       timr->it.real.interval.tv64 = 0;
+       /*
+        * careful here.  If smp we could be in the "fire" routine which will
+        * be spinning as we hold the lock.  But this is ONLY an SMP issue.
+        */
+       if (hrtimer_try_to_cancel(timer) < 0)
+               return TIMER_RETRY;
+
+       timr->it_requeue_pending = (timr->it_requeue_pending + 2) & 
+               ~REQUEUE_PENDING;
+       timr->it_overrun_last = 0;
+
+       /* switch off the timer when it_value is zero */
+       if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
+               return 0;
+
+       mode = flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
+       hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
+       timr->it.real.timer.function = posix_timer_fn;
+
+       hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value));
+
+       /* Convert interval */
+       timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
+
+       /* SIGEV_NONE timers are not queued ! See common_timer_get */
+       if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
+               /* Setup correct expiry time for relative timers */
+               if (mode == HRTIMER_MODE_REL) {
+                       hrtimer_add_expires(timer, timer->base->get_time());
+               }
+               return 0;
+       }
+
+       hrtimer_start_expires(timer, mode);
+       return 0;
+}
+
+/* Set a POSIX.1b interval timer */
+SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
+               const struct itimerspec __user *, new_setting,
+               struct itimerspec __user *, old_setting)
+{
+       struct k_itimer *timr;
+       struct itimerspec new_spec, old_spec;
+       int error = 0;
+       unsigned long flag;
+       struct itimerspec *rtn = old_setting ? &old_spec : NULL;
+       struct k_clock *kc;
+
+       if (!new_setting)
+               return -EINVAL;
+
+       if (copy_from_user(&new_spec, new_setting, sizeof (new_spec)))
+               return -EFAULT;
+
+       if (!timespec_valid(&new_spec.it_interval) ||
+           !timespec_valid(&new_spec.it_value))
+               return -EINVAL;
+retry:
+       timr = lock_timer(timer_id, &flag);
+       if (!timr)
+               return -EINVAL;
+
+       kc = clockid_to_kclock(timr->it_clock);
+       if (WARN_ON_ONCE(!kc || !kc->timer_set))
+               error = -EINVAL;
+       else
+               error = kc->timer_set(timr, flags, &new_spec, rtn);
+
+       unlock_timer(timr, flag);
+       if (error == TIMER_RETRY) {
+               rtn = NULL;     // We already got the old time...
+               goto retry;
+       }
+
+       if (old_setting && !error &&
+           copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
+               error = -EFAULT;
+
+       return error;
+}
+
+static int common_timer_del(struct k_itimer *timer)
+{
+       timer->it.real.interval.tv64 = 0;
+
+       if (hrtimer_try_to_cancel(&timer->it.real.timer) < 0)
+               return TIMER_RETRY;
+       return 0;
+}
+
+static inline int timer_delete_hook(struct k_itimer *timer)
+{
+       struct k_clock *kc = clockid_to_kclock(timer->it_clock);
+
+       if (WARN_ON_ONCE(!kc || !kc->timer_del))
+               return -EINVAL;
+       return kc->timer_del(timer);
+}
+
+/* Delete a POSIX.1b interval timer. */
+SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
+{
+       struct k_itimer *timer;
+       unsigned long flags;
+
+retry_delete:
+       timer = lock_timer(timer_id, &flags);
+       if (!timer)
+               return -EINVAL;
+
+       if (timer_delete_hook(timer) == TIMER_RETRY) {
+               unlock_timer(timer, flags);
+               goto retry_delete;
+       }
+
+       spin_lock(&current->sighand->siglock);
+       list_del(&timer->list);
+       spin_unlock(&current->sighand->siglock);
+       /*
+        * This keeps any tasks waiting on the spin lock from thinking
+        * they got something (see the lock code above).
+        */
+       timer->it_signal = NULL;
+
+       unlock_timer(timer, flags);
+       release_posix_timer(timer, IT_ID_SET);
+       return 0;
+}
+
+/*
+ * return timer owned by the process, used by exit_itimers
+ */
+static void itimer_delete(struct k_itimer *timer)
+{
+       unsigned long flags;
+
+retry_delete:
+       spin_lock_irqsave(&timer->it_lock, flags);
+
+       if (timer_delete_hook(timer) == TIMER_RETRY) {
+               unlock_timer(timer, flags);
+               goto retry_delete;
+       }
+       list_del(&timer->list);
+       /*
+        * This keeps any tasks waiting on the spin lock from thinking
+        * they got something (see the lock code above).
+        */
+       timer->it_signal = NULL;
+
+       unlock_timer(timer, flags);
+       release_posix_timer(timer, IT_ID_SET);
+}
+
+/*
+ * This is called by do_exit or de_thread, only when there are no more
+ * references to the shared signal_struct.
+ */
+void exit_itimers(struct signal_struct *sig)
+{
+       struct k_itimer *tmr;
+
+       while (!list_empty(&sig->posix_timers)) {
+               tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
+               itimer_delete(tmr);
+       }
+}
+
+SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
+               const struct timespec __user *, tp)
+{
+       struct k_clock *kc = clockid_to_kclock(which_clock);
+       struct timespec new_tp;
+
+       if (!kc || !kc->clock_set)
+               return -EINVAL;
+
+       if (copy_from_user(&new_tp, tp, sizeof (*tp)))
+               return -EFAULT;
+
+       return kc->clock_set(which_clock, &new_tp);
+}
+
+SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
+               struct timespec __user *,tp)
+{
+       struct k_clock *kc = clockid_to_kclock(which_clock);
+       struct timespec kernel_tp;
+       int error;
+
+       if (!kc)
+               return -EINVAL;
+
+       error = kc->clock_get(which_clock, &kernel_tp);
+
+       if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
+               error = -EFAULT;
+
+       return error;
+}
+
+SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
+               struct timex __user *, utx)
+{
+       struct k_clock *kc = clockid_to_kclock(which_clock);
+       struct timex ktx;
+       int err;
+
+       if (!kc)
+               return -EINVAL;
+       if (!kc->clock_adj)
+               return -EOPNOTSUPP;
+
+       if (copy_from_user(&ktx, utx, sizeof(ktx)))
+               return -EFAULT;
+
+       err = kc->clock_adj(which_clock, &ktx);
+
+       if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
+               return -EFAULT;
+
+       return err;
+}
+
+SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
+               struct timespec __user *, tp)
+{
+       struct k_clock *kc = clockid_to_kclock(which_clock);
+       struct timespec rtn_tp;
+       int error;
+
+       if (!kc)
+               return -EINVAL;
+
+       error = kc->clock_getres(which_clock, &rtn_tp);
+
+       if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
+               error = -EFAULT;
+
+       return error;
+}
+
+/*
+ * nanosleep for monotonic and realtime clocks
+ */
+static int common_nsleep(const clockid_t which_clock, int flags,
+                        struct timespec *tsave, struct timespec __user *rmtp)
+{
+       return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ?
+                                HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
+                                which_clock);
+}
+
+SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
+               const struct timespec __user *, rqtp,
+               struct timespec __user *, rmtp)
+{
+       struct k_clock *kc = clockid_to_kclock(which_clock);
+       struct timespec t;
+
+       if (!kc)
+               return -EINVAL;
+       if (!kc->nsleep)
+               return -ENANOSLEEP_NOTSUP;
+
+       if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
+               return -EFAULT;
+
+       if (!timespec_valid(&t))
+               return -EINVAL;
+
+       return kc->nsleep(which_clock, flags, &t, rmtp);
+}
+
+/*
+ * This will restart clock_nanosleep. This is required only by
+ * compat_clock_nanosleep_restart for now.
+ */
+long clock_nanosleep_restart(struct restart_block *restart_block)
+{
+       clockid_t which_clock = restart_block->nanosleep.clockid;
+       struct k_clock *kc = clockid_to_kclock(which_clock);
+
+       if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
+               return -EINVAL;
+
+       return kc->nsleep_restart(restart_block);
+}
diff --git a/kernel/time/time.c b/kernel/time/time.c
new file mode 100644 (file)
index 0000000..7c7964c
--- /dev/null
@@ -0,0 +1,714 @@
+/*
+ *  linux/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  This file contains the interface functions for the various
+ *  time related system calls: time, stime, gettimeofday, settimeofday,
+ *                            adjtime
+ */
+/*
+ * Modification history kernel/time.c
+ *
+ * 1993-09-02    Philip Gladstone
+ *      Created file with time related functions from sched/core.c and adjtimex()
+ * 1993-10-08    Torsten Duwe
+ *      adjtime interface update and CMOS clock write code
+ * 1995-08-13    Torsten Duwe
+ *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
+ * 1999-01-16    Ulrich Windl
+ *     Introduced error checking for many cases in adjtimex().
+ *     Updated NTP code according to technical memorandum Jan '96
+ *     "A Kernel Model for Precision Timekeeping" by Dave Mills
+ *     Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
+ *     (Even though the technical memorandum forbids it)
+ * 2004-07-14   Christoph Lameter
+ *     Added getnstimeofday to allow the posix timer functions to return
+ *     with nanosecond accuracy
+ */
+
+#include <linux/export.h>
+#include <linux/timex.h>
+#include <linux/capability.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/errno.h>
+#include <linux/syscalls.h>
+#include <linux/security.h>
+#include <linux/fs.h>
+#include <linux/math64.h>
+#include <linux/ptrace.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+#include "timeconst.h"
+
+/*
+ * The timezone where the local system is located.  Used as a default by some
+ * programs who obtain this value by using gettimeofday.
+ */
+struct timezone sys_tz;
+
+EXPORT_SYMBOL(sys_tz);
+
+#ifdef __ARCH_WANT_SYS_TIME
+
+/*
+ * sys_time() can be implemented in user-level using
+ * sys_gettimeofday().  Is this for backwards compatibility?  If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ */
+SYSCALL_DEFINE1(time, time_t __user *, tloc)
+{
+       time_t i = get_seconds();
+
+       if (tloc) {
+               if (put_user(i,tloc))
+                       return -EFAULT;
+       }
+       force_successful_syscall_return();
+       return i;
+}
+
+/*
+ * sys_stime() can be implemented in user-level using
+ * sys_settimeofday().  Is this for backwards compatibility?  If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ */
+
+SYSCALL_DEFINE1(stime, time_t __user *, tptr)
+{
+       struct timespec tv;
+       int err;
+
+       if (get_user(tv.tv_sec, tptr))
+               return -EFAULT;
+
+       tv.tv_nsec = 0;
+
+       err = security_settime(&tv, NULL);
+       if (err)
+               return err;
+
+       do_settimeofday(&tv);
+       return 0;
+}
+
+#endif /* __ARCH_WANT_SYS_TIME */
+
+SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
+               struct timezone __user *, tz)
+{
+       if (likely(tv != NULL)) {
+               struct timeval ktv;
+               do_gettimeofday(&ktv);
+               if (copy_to_user(tv, &ktv, sizeof(ktv)))
+                       return -EFAULT;
+       }
+       if (unlikely(tz != NULL)) {
+               if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+                       return -EFAULT;
+       }
+       return 0;
+}
+
+/*
+ * Indicates if there is an offset between the system clock and the hardware
+ * clock/persistent clock/rtc.
+ */
+int persistent_clock_is_local;
+
+/*
+ * Adjust the time obtained from the CMOS to be UTC time instead of
+ * local time.
+ *
+ * This is ugly, but preferable to the alternatives.  Otherwise we
+ * would either need to write a program to do it in /etc/rc (and risk
+ * confusion if the program gets run more than once; it would also be
+ * hard to make the program warp the clock precisely n hours)  or
+ * compile in the timezone information into the kernel.  Bad, bad....
+ *
+ *                                             - TYT, 1992-01-01
+ *
+ * The best thing to do is to keep the CMOS clock in universal time (UTC)
+ * as real UNIX machines always do it. This avoids all headaches about
+ * daylight saving times and warping kernel clocks.
+ */
+static inline void warp_clock(void)
+{
+       if (sys_tz.tz_minuteswest != 0) {
+               struct timespec adjust;
+
+               persistent_clock_is_local = 1;
+               adjust.tv_sec = sys_tz.tz_minuteswest * 60;
+               adjust.tv_nsec = 0;
+               timekeeping_inject_offset(&adjust);
+       }
+}
+
+/*
+ * In case for some reason the CMOS clock has not already been running
+ * in UTC, but in some local time: The first time we set the timezone,
+ * we will warp the clock so that it is ticking UTC time instead of
+ * local time. Presumably, if someone is setting the timezone then we
+ * are running in an environment where the programs understand about
+ * timezones. This should be done at boot time in the /etc/rc script,
+ * as soon as possible, so that the clock can be set right. Otherwise,
+ * various programs will get confused when the clock gets warped.
+ */
+
+int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
+{
+       static int firsttime = 1;
+       int error = 0;
+
+       if (tv && !timespec_valid(tv))
+               return -EINVAL;
+
+       error = security_settime(tv, tz);
+       if (error)
+               return error;
+
+       if (tz) {
+               sys_tz = *tz;
+               update_vsyscall_tz();
+               if (firsttime) {
+                       firsttime = 0;
+                       if (!tv)
+                               warp_clock();
+               }
+       }
+       if (tv)
+               return do_settimeofday(tv);
+       return 0;
+}
+
+SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv,
+               struct timezone __user *, tz)
+{
+       struct timeval user_tv;
+       struct timespec new_ts;
+       struct timezone new_tz;
+
+       if (tv) {
+               if (copy_from_user(&user_tv, tv, sizeof(*tv)))
+                       return -EFAULT;
+               new_ts.tv_sec = user_tv.tv_sec;
+               new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
+       }
+       if (tz) {
+               if (copy_from_user(&new_tz, tz, sizeof(*tz)))
+                       return -EFAULT;
+       }
+
+       return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
+}
+
+SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
+{
+       struct timex txc;               /* Local copy of parameter */
+       int ret;
+
+       /* Copy the user data space into the kernel copy
+        * structure. But bear in mind that the structures
+        * may change
+        */
+       if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
+               return -EFAULT;
+       ret = do_adjtimex(&txc);
+       return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+}
+
+/**
+ * current_fs_time - Return FS time
+ * @sb: Superblock.
+ *
+ * Return the current time truncated to the time granularity supported by
+ * the fs.
+ */
+struct timespec current_fs_time(struct super_block *sb)
+{
+       struct timespec now = current_kernel_time();
+       return timespec_trunc(now, sb->s_time_gran);
+}
+EXPORT_SYMBOL(current_fs_time);
+
+/*
+ * Convert jiffies to milliseconds and back.
+ *
+ * Avoid unnecessary multiplications/divisions in the
+ * two most common HZ cases:
+ */
+unsigned int jiffies_to_msecs(const unsigned long j)
+{
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+       return (MSEC_PER_SEC / HZ) * j;
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+       return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
+#else
+# if BITS_PER_LONG == 32
+       return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32;
+# else
+       return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN;
+# endif
+#endif
+}
+EXPORT_SYMBOL(jiffies_to_msecs);
+
+unsigned int jiffies_to_usecs(const unsigned long j)
+{
+#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
+       return (USEC_PER_SEC / HZ) * j;
+#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
+       return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC);
+#else
+# if BITS_PER_LONG == 32
+       return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32;
+# else
+       return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN;
+# endif
+#endif
+}
+EXPORT_SYMBOL(jiffies_to_usecs);
+
+/**
+ * timespec_trunc - Truncate timespec to a granularity
+ * @t: Timespec
+ * @gran: Granularity in ns.
+ *
+ * Truncate a timespec to a granularity. gran must be smaller than a second.
+ * Always rounds down.
+ *
+ * This function should be only used for timestamps returned by
+ * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because
+ * it doesn't handle the better resolution of the latter.
+ */
+struct timespec timespec_trunc(struct timespec t, unsigned gran)
+{
+       /*
+        * Division is pretty slow so avoid it for common cases.
+        * Currently current_kernel_time() never returns better than
+        * jiffies resolution. Exploit that.
+        */
+       if (gran <= jiffies_to_usecs(1) * 1000) {
+               /* nothing */
+       } else if (gran == 1000000000) {
+               t.tv_nsec = 0;
+       } else {
+               t.tv_nsec -= t.tv_nsec % gran;
+       }
+       return t;
+}
+EXPORT_SYMBOL(timespec_trunc);
+
+/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+ * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
+ *
+ * [For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.]
+ *
+ * This algorithm was first published by Gauss (I think).
+ *
+ * WARNING: this function will overflow on 2106-02-07 06:28:16 on
+ * machines where long is 32-bit! (However, as time_t is signed, we
+ * will already get problems at other places on 2038-01-19 03:14:08)
+ */
+unsigned long
+mktime(const unsigned int year0, const unsigned int mon0,
+       const unsigned int day, const unsigned int hour,
+       const unsigned int min, const unsigned int sec)
+{
+       unsigned int mon = mon0, year = year0;
+
+       /* 1..12 -> 11,12,1..10 */
+       if (0 >= (int) (mon -= 2)) {
+               mon += 12;      /* Puts Feb last since it has leap day */
+               year -= 1;
+       }
+
+       return ((((unsigned long)
+                 (year/4 - year/100 + year/400 + 367*mon/12 + day) +
+                 year*365 - 719499
+           )*24 + hour /* now have hours */
+         )*60 + min /* now have minutes */
+       )*60 + sec; /* finally seconds */
+}
+
+EXPORT_SYMBOL(mktime);
+
+/**
+ * set_normalized_timespec - set timespec sec and nsec parts and normalize
+ *
+ * @ts:                pointer to timespec variable to be set
+ * @sec:       seconds to set
+ * @nsec:      nanoseconds to set
+ *
+ * Set seconds and nanoseconds field of a timespec variable and
+ * normalize to the timespec storage format
+ *
+ * Note: The tv_nsec part is always in the range of
+ *     0 <= tv_nsec < NSEC_PER_SEC
+ * For negative values only the tv_sec field is negative !
+ */
+void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
+{
+       while (nsec >= NSEC_PER_SEC) {
+               /*
+                * The following asm() prevents the compiler from
+                * optimising this loop into a modulo operation. See
+                * also __iter_div_u64_rem() in include/linux/time.h
+                */
+               asm("" : "+rm"(nsec));
+               nsec -= NSEC_PER_SEC;
+               ++sec;
+       }
+       while (nsec < 0) {
+               asm("" : "+rm"(nsec));
+               nsec += NSEC_PER_SEC;
+               --sec;
+       }
+       ts->tv_sec = sec;
+       ts->tv_nsec = nsec;
+}
+EXPORT_SYMBOL(set_normalized_timespec);
+
+/**
+ * ns_to_timespec - Convert nanoseconds to timespec
+ * @nsec:       the nanoseconds value to be converted
+ *
+ * Returns the timespec representation of the nsec parameter.
+ */
+struct timespec ns_to_timespec(const s64 nsec)
+{
+       struct timespec ts;
+       s32 rem;
+
+       if (!nsec)
+               return (struct timespec) {0, 0};
+
+       ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
+       if (unlikely(rem < 0)) {
+               ts.tv_sec--;
+               rem += NSEC_PER_SEC;
+       }
+       ts.tv_nsec = rem;
+
+       return ts;
+}
+EXPORT_SYMBOL(ns_to_timespec);
+
+/**
+ * ns_to_timeval - Convert nanoseconds to timeval
+ * @nsec:       the nanoseconds value to be converted
+ *
+ * Returns the timeval representation of the nsec parameter.
+ */
+struct timeval ns_to_timeval(const s64 nsec)
+{
+       struct timespec ts = ns_to_timespec(nsec);
+       struct timeval tv;
+
+       tv.tv_sec = ts.tv_sec;
+       tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000;
+
+       return tv;
+}
+EXPORT_SYMBOL(ns_to_timeval);
+
+/*
+ * When we convert to jiffies then we interpret incoming values
+ * the following way:
+ *
+ * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
+ *
+ * - 'too large' values [that would result in larger than
+ *   MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
+ *
+ * - all other values are converted to jiffies by either multiplying
+ *   the input value by a factor or dividing it with a factor
+ *
+ * We must also be careful about 32-bit overflows.
+ */
+unsigned long msecs_to_jiffies(const unsigned int m)
+{
+       /*
+        * Negative value, means infinite timeout:
+        */
+       if ((int)m < 0)
+               return MAX_JIFFY_OFFSET;
+
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+       /*
+        * HZ is equal to or smaller than 1000, and 1000 is a nice
+        * round multiple of HZ, divide with the factor between them,
+        * but round upwards:
+        */
+       return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+       /*
+        * HZ is larger than 1000, and HZ is a nice round multiple of
+        * 1000 - simply multiply with the factor between them.
+        *
+        * But first make sure the multiplication result cannot
+        * overflow:
+        */
+       if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+               return MAX_JIFFY_OFFSET;
+
+       return m * (HZ / MSEC_PER_SEC);
+#else
+       /*
+        * Generic case - multiply, round and divide. But first
+        * check that if we are doing a net multiplication, that
+        * we wouldn't overflow:
+        */
+       if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+               return MAX_JIFFY_OFFSET;
+
+       return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32)
+               >> MSEC_TO_HZ_SHR32;
+#endif
+}
+EXPORT_SYMBOL(msecs_to_jiffies);
+
+unsigned long usecs_to_jiffies(const unsigned int u)
+{
+       if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
+               return MAX_JIFFY_OFFSET;
+#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
+       return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
+#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
+       return u * (HZ / USEC_PER_SEC);
+#else
+       return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32)
+               >> USEC_TO_HZ_SHR32;
+#endif
+}
+EXPORT_SYMBOL(usecs_to_jiffies);
+
+/*
+ * The TICK_NSEC - 1 rounds up the value to the next resolution.  Note
+ * that a remainder subtract here would not do the right thing as the
+ * resolution values don't fall on second boundries.  I.e. the line:
+ * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding.
+ *
+ * Rather, we just shift the bits off the right.
+ *
+ * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec
+ * value to a scaled second value.
+ */
+unsigned long
+timespec_to_jiffies(const struct timespec *value)
+{
+       unsigned long sec = value->tv_sec;
+       long nsec = value->tv_nsec + TICK_NSEC - 1;
+
+       if (sec >= MAX_SEC_IN_JIFFIES){
+               sec = MAX_SEC_IN_JIFFIES;
+               nsec = 0;
+       }
+       return (((u64)sec * SEC_CONVERSION) +
+               (((u64)nsec * NSEC_CONVERSION) >>
+                (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
+
+}
+EXPORT_SYMBOL(timespec_to_jiffies);
+
+void
+jiffies_to_timespec(const unsigned long jiffies, struct timespec *value)
+{
+       /*
+        * Convert jiffies to nanoseconds and separate with
+        * one divide.
+        */
+       u32 rem;
+       value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
+                                   NSEC_PER_SEC, &rem);
+       value->tv_nsec = rem;
+}
+EXPORT_SYMBOL(jiffies_to_timespec);
+
+/* Same for "timeval"
+ *
+ * Well, almost.  The problem here is that the real system resolution is
+ * in nanoseconds and the value being converted is in micro seconds.
+ * Also for some machines (those that use HZ = 1024, in-particular),
+ * there is a LARGE error in the tick size in microseconds.
+
+ * The solution we use is to do the rounding AFTER we convert the
+ * microsecond part.  Thus the USEC_ROUND, the bits to be shifted off.
+ * Instruction wise, this should cost only an additional add with carry
+ * instruction above the way it was done above.
+ */
+unsigned long
+timeval_to_jiffies(const struct timeval *value)
+{
+       unsigned long sec = value->tv_sec;
+       long usec = value->tv_usec;
+
+       if (sec >= MAX_SEC_IN_JIFFIES){
+               sec = MAX_SEC_IN_JIFFIES;
+               usec = 0;
+       }
+       return (((u64)sec * SEC_CONVERSION) +
+               (((u64)usec * USEC_CONVERSION + USEC_ROUND) >>
+                (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
+}
+EXPORT_SYMBOL(timeval_to_jiffies);
+
+void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value)
+{
+       /*
+        * Convert jiffies to nanoseconds and separate with
+        * one divide.
+        */
+       u32 rem;
+
+       value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
+                                   NSEC_PER_SEC, &rem);
+       value->tv_usec = rem / NSEC_PER_USEC;
+}
+EXPORT_SYMBOL(jiffies_to_timeval);
+
+/*
+ * Convert jiffies/jiffies_64 to clock_t and back.
+ */
+clock_t jiffies_to_clock_t(unsigned long x)
+{
+#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
+# if HZ < USER_HZ
+       return x * (USER_HZ / HZ);
+# else
+       return x / (HZ / USER_HZ);
+# endif
+#else
+       return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ);
+#endif
+}
+EXPORT_SYMBOL(jiffies_to_clock_t);
+
+unsigned long clock_t_to_jiffies(unsigned long x)
+{
+#if (HZ % USER_HZ)==0
+       if (x >= ~0UL / (HZ / USER_HZ))
+               return ~0UL;
+       return x * (HZ / USER_HZ);
+#else
+       /* Don't worry about loss of precision here .. */
+       if (x >= ~0UL / HZ * USER_HZ)
+               return ~0UL;
+
+       /* .. but do try to contain it here */
+       return div_u64((u64)x * HZ, USER_HZ);
+#endif
+}
+EXPORT_SYMBOL(clock_t_to_jiffies);
+
+u64 jiffies_64_to_clock_t(u64 x)
+{
+#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
+# if HZ < USER_HZ
+       x = div_u64(x * USER_HZ, HZ);
+# elif HZ > USER_HZ
+       x = div_u64(x, HZ / USER_HZ);
+# else
+       /* Nothing to do */
+# endif
+#else
+       /*
+        * There are better ways that don't overflow early,
+        * but even this doesn't overflow in hundreds of years
+        * in 64 bits, so..
+        */
+       x = div_u64(x * TICK_NSEC, (NSEC_PER_SEC / USER_HZ));
+#endif
+       return x;
+}
+EXPORT_SYMBOL(jiffies_64_to_clock_t);
+
+u64 nsec_to_clock_t(u64 x)
+{
+#if (NSEC_PER_SEC % USER_HZ) == 0
+       return div_u64(x, NSEC_PER_SEC / USER_HZ);
+#elif (USER_HZ % 512) == 0
+       return div_u64(x * USER_HZ / 512, NSEC_PER_SEC / 512);
+#else
+       /*
+         * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024,
+         * overflow after 64.99 years.
+         * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
+         */
+       return div_u64(x * 9, (9ull * NSEC_PER_SEC + (USER_HZ / 2)) / USER_HZ);
+#endif
+}
+
+/**
+ * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
+ *
+ * @n: nsecs in u64
+ *
+ * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
+ * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
+ * for scheduler, not for use in device drivers to calculate timeout value.
+ *
+ * note:
+ *   NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
+ *   ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
+ */
+u64 nsecs_to_jiffies64(u64 n)
+{
+#if (NSEC_PER_SEC % HZ) == 0
+       /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
+       return div_u64(n, NSEC_PER_SEC / HZ);
+#elif (HZ % 512) == 0
+       /* overflow after 292 years if HZ = 1024 */
+       return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
+#else
+       /*
+        * Generic case - optimized for cases where HZ is a multiple of 3.
+        * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
+        */
+       return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
+#endif
+}
+
+/**
+ * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ *
+ * @n: nsecs in u64
+ *
+ * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
+ * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
+ * for scheduler, not for use in device drivers to calculate timeout value.
+ *
+ * note:
+ *   NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
+ *   ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
+ */
+unsigned long nsecs_to_jiffies(u64 n)
+{
+       return (unsigned long)nsecs_to_jiffies64(n);
+}
+
+/*
+ * Add two timespec values and do a safety check for overflow.
+ * It's assumed that both values are valid (>= 0)
+ */
+struct timespec timespec_add_safe(const struct timespec lhs,
+                                 const struct timespec rhs)
+{
+       struct timespec res;
+
+       set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec,
+                               lhs.tv_nsec + rhs.tv_nsec);
+
+       if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)
+               res.tv_sec = TIME_T_MAX;
+
+       return res;
+}
diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc
new file mode 100644 (file)
index 0000000..511bdf2
--- /dev/null
@@ -0,0 +1,108 @@
+scale=0
+
+define gcd(a,b) {
+       auto t;
+       while (b) {
+               t = b;
+               b = a % b;
+               a = t;
+       }
+       return a;
+}
+
+/* Division by reciprocal multiplication. */
+define fmul(b,n,d) {
+       return (2^b*n+d-1)/d;
+}
+
+/* Adjustment factor when a ceiling value is used.  Use as:
+   (imul * n) + (fmulxx * n + fadjxx) >> xx) */
+define fadj(b,n,d) {
+       auto v;
+       d = d/gcd(n,d);
+       v = 2^b*(d-1)/d;
+       return v;
+}
+
+/* Compute the appropriate mul/adj values as well as a shift count,
+   which brings the mul value into the range 2^b-1 <= x < 2^b.  Such
+   a shift value will be correct in the signed integer range and off
+   by at most one in the upper half of the unsigned range. */
+define fmuls(b,n,d) {
+       auto s, m;
+       for (s = 0; 1; s++) {
+               m = fmul(s,n,d);
+               if (m >= 2^(b-1))
+                       return s;
+       }
+       return 0;
+}
+
+define timeconst(hz) {
+       print "/* Automatically generated by kernel/timeconst.bc */\n"
+       print "/* Time conversion constants for HZ == ", hz, " */\n"
+       print "\n"
+
+       print "#ifndef KERNEL_TIMECONST_H\n"
+       print "#define KERNEL_TIMECONST_H\n\n"
+
+       print "#include <linux/param.h>\n"
+       print "#include <linux/types.h>\n\n"
+
+       print "#if HZ != ", hz, "\n"
+       print "#error \qkernel/timeconst.h has the wrong HZ value!\q\n"
+       print "#endif\n\n"
+
+       if (hz < 2) {
+               print "#error Totally bogus HZ value!\n"
+       } else {
+               s=fmuls(32,1000,hz)
+               obase=16
+               print "#define HZ_TO_MSEC_MUL32\tU64_C(0x", fmul(s,1000,hz), ")\n"
+               print "#define HZ_TO_MSEC_ADJ32\tU64_C(0x", fadj(s,1000,hz), ")\n"
+               obase=10
+               print "#define HZ_TO_MSEC_SHR32\t", s, "\n"
+
+               s=fmuls(32,hz,1000)
+               obase=16
+               print "#define MSEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000), ")\n"
+               print "#define MSEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000), ")\n"
+               obase=10
+               print "#define MSEC_TO_HZ_SHR32\t", s, "\n"
+
+               obase=10
+               cd=gcd(hz,1000)
+               print "#define HZ_TO_MSEC_NUM\t\t", 1000/cd, "\n"
+               print "#define HZ_TO_MSEC_DEN\t\t", hz/cd, "\n"
+               print "#define MSEC_TO_HZ_NUM\t\t", hz/cd, "\n"
+               print "#define MSEC_TO_HZ_DEN\t\t", 1000/cd, "\n"
+               print "\n"
+
+               s=fmuls(32,1000000,hz)
+               obase=16
+               print "#define HZ_TO_USEC_MUL32\tU64_C(0x", fmul(s,1000000,hz), ")\n"
+               print "#define HZ_TO_USEC_ADJ32\tU64_C(0x", fadj(s,1000000,hz), ")\n"
+               obase=10
+               print "#define HZ_TO_USEC_SHR32\t", s, "\n"
+
+               s=fmuls(32,hz,1000000)
+               obase=16
+               print "#define USEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000000), ")\n"
+               print "#define USEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000000), ")\n"
+               obase=10
+               print "#define USEC_TO_HZ_SHR32\t", s, "\n"
+
+               obase=10
+               cd=gcd(hz,1000000)
+               print "#define HZ_TO_USEC_NUM\t\t", 1000000/cd, "\n"
+               print "#define HZ_TO_USEC_DEN\t\t", hz/cd, "\n"
+               print "#define USEC_TO_HZ_NUM\t\t", hz/cd, "\n"
+               print "#define USEC_TO_HZ_DEN\t\t", 1000000/cd, "\n"
+               print "\n"
+
+               print "#endif /* KERNEL_TIMECONST_H */\n"
+       }
+       halt
+}
+
+timeconst(hz)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
new file mode 100644 (file)
index 0000000..3bb01a3
--- /dev/null
@@ -0,0 +1,1734 @@
+/*
+ *  linux/kernel/timer.c
+ *
+ *  Kernel internal timers
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
+ *
+ *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
+ *              "A Kernel Model for Precision Timekeeping" by Dave Mills
+ *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ *              serialize accesses to xtime/lost_ticks).
+ *                              Copyright (C) 1998  Andrea Arcangeli
+ *  1999-03-10  Improved NTP compatibility by Ulrich Windl
+ *  2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love
+ *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
+ *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
+ *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/pid_namespace.h>
+#include <linux/notifier.h>
+#include <linux/thread_info.h>
+#include <linux/time.h>
+#include <linux/jiffies.h>
+#include <linux/posix-timers.h>
+#include <linux/cpu.h>
+#include <linux/syscalls.h>
+#include <linux/delay.h>
+#include <linux/tick.h>
+#include <linux/kallsyms.h>
+#include <linux/irq_work.h>
+#include <linux/sched.h>
+#include <linux/sched/sysctl.h>
+#include <linux/slab.h>
+#include <linux/compat.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/div64.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/timer.h>
+
+__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
+
+EXPORT_SYMBOL(jiffies_64);
+
+/*
+ * per-CPU timer vector definitions:
+ */
+#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
+#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
+
+struct tvec {
+       struct list_head vec[TVN_SIZE];
+};
+
+struct tvec_root {
+       struct list_head vec[TVR_SIZE];
+};
+
+struct tvec_base {
+       spinlock_t lock;
+       struct timer_list *running_timer;
+       unsigned long timer_jiffies;
+       unsigned long next_timer;
+       unsigned long active_timers;
+       unsigned long all_timers;
+       struct tvec_root tv1;
+       struct tvec tv2;
+       struct tvec tv3;
+       struct tvec tv4;
+       struct tvec tv5;
+} ____cacheline_aligned;
+
+struct tvec_base boot_tvec_bases;
+EXPORT_SYMBOL(boot_tvec_bases);
+static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
+
+/* Functions below help us manage 'deferrable' flag */
+static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
+{
+       return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE);
+}
+
+static inline unsigned int tbase_get_irqsafe(struct tvec_base *base)
+{
+       return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE);
+}
+
+static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
+{
+       return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK));
+}
+
+static inline void
+timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
+{
+       unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK;
+
+       timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
+}
+
+static unsigned long round_jiffies_common(unsigned long j, int cpu,
+               bool force_up)
+{
+       int rem;
+       unsigned long original = j;
+
+       /*
+        * We don't want all cpus firing their timers at once hitting the
+        * same lock or cachelines, so we skew each extra cpu with an extra
+        * 3 jiffies. This 3 jiffies came originally from the mm/ code which
+        * already did this.
+        * The skew is done by adding 3*cpunr, then round, then subtract this
+        * extra offset again.
+        */
+       j += cpu * 3;
+
+       rem = j % HZ;
+
+       /*
+        * If the target jiffie is just after a whole second (which can happen
+        * due to delays of the timer irq, long irq off times etc etc) then
+        * we should round down to the whole second, not up. Use 1/4th second
+        * as cutoff for this rounding as an extreme upper bound for this.
+        * But never round down if @force_up is set.
+        */
+       if (rem < HZ/4 && !force_up) /* round down */
+               j = j - rem;
+       else /* round up */
+               j = j - rem + HZ;
+
+       /* now that we have rounded, subtract the extra skew again */
+       j -= cpu * 3;
+
+       /*
+        * Make sure j is still in the future. Otherwise return the
+        * unmodified value.
+        */
+       return time_is_after_jiffies(j) ? j : original;
+}
+
+/**
+ * __round_jiffies - function to round jiffies to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * __round_jiffies() rounds an absolute time in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The exact rounding is skewed for each processor to avoid all
+ * processors firing at the exact same time, which could lead
+ * to lock contention or spurious cache line bouncing.
+ *
+ * The return value is the rounded version of the @j parameter.
+ */
+unsigned long __round_jiffies(unsigned long j, int cpu)
+{
+       return round_jiffies_common(j, cpu, false);
+}
+EXPORT_SYMBOL_GPL(__round_jiffies);
+
+/**
+ * __round_jiffies_relative - function to round jiffies to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * __round_jiffies_relative() rounds a time delta  in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The exact rounding is skewed for each processor to avoid all
+ * processors firing at the exact same time, which could lead
+ * to lock contention or spurious cache line bouncing.
+ *
+ * The return value is the rounded version of the @j parameter.
+ */
+unsigned long __round_jiffies_relative(unsigned long j, int cpu)
+{
+       unsigned long j0 = jiffies;
+
+       /* Use j0 because jiffies might change while we run */
+       return round_jiffies_common(j + j0, cpu, false) - j0;
+}
+EXPORT_SYMBOL_GPL(__round_jiffies_relative);
+
+/**
+ * round_jiffies - function to round jiffies to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ *
+ * round_jiffies() rounds an absolute time in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The return value is the rounded version of the @j parameter.
+ */
+unsigned long round_jiffies(unsigned long j)
+{
+       return round_jiffies_common(j, raw_smp_processor_id(), false);
+}
+EXPORT_SYMBOL_GPL(round_jiffies);
+
+/**
+ * round_jiffies_relative - function to round jiffies to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ *
+ * round_jiffies_relative() rounds a time delta  in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The return value is the rounded version of the @j parameter.
+ */
+unsigned long round_jiffies_relative(unsigned long j)
+{
+       return __round_jiffies_relative(j, raw_smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(round_jiffies_relative);
+
+/**
+ * __round_jiffies_up - function to round jiffies up to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * This is the same as __round_jiffies() except that it will never
+ * round down.  This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long __round_jiffies_up(unsigned long j, int cpu)
+{
+       return round_jiffies_common(j, cpu, true);
+}
+EXPORT_SYMBOL_GPL(__round_jiffies_up);
+
+/**
+ * __round_jiffies_up_relative - function to round jiffies up to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * This is the same as __round_jiffies_relative() except that it will never
+ * round down.  This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
+{
+       unsigned long j0 = jiffies;
+
+       /* Use j0 because jiffies might change while we run */
+       return round_jiffies_common(j + j0, cpu, true) - j0;
+}
+EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
+
+/**
+ * round_jiffies_up - function to round jiffies up to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ *
+ * This is the same as round_jiffies() except that it will never
+ * round down.  This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long round_jiffies_up(unsigned long j)
+{
+       return round_jiffies_common(j, raw_smp_processor_id(), true);
+}
+EXPORT_SYMBOL_GPL(round_jiffies_up);
+
+/**
+ * round_jiffies_up_relative - function to round jiffies up to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ *
+ * This is the same as round_jiffies_relative() except that it will never
+ * round down.  This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long round_jiffies_up_relative(unsigned long j)
+{
+       return __round_jiffies_up_relative(j, raw_smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
+
+/**
+ * set_timer_slack - set the allowed slack for a timer
+ * @timer: the timer to be modified
+ * @slack_hz: the amount of time (in jiffies) allowed for rounding
+ *
+ * Set the amount of time, in jiffies, that a certain timer has
+ * in terms of slack. By setting this value, the timer subsystem
+ * will schedule the actual timer somewhere between
+ * the time mod_timer() asks for, and that time plus the slack.
+ *
+ * By setting the slack to -1, a percentage of the delay is used
+ * instead.
+ */
+void set_timer_slack(struct timer_list *timer, int slack_hz)
+{
+       timer->slack = slack_hz;
+}
+EXPORT_SYMBOL_GPL(set_timer_slack);
+
+/*
+ * If the list is empty, catch up ->timer_jiffies to the current time.
+ * The caller must hold the tvec_base lock.  Returns true if the list
+ * was empty and therefore ->timer_jiffies was updated.
+ */
+static bool catchup_timer_jiffies(struct tvec_base *base)
+{
+       if (!base->all_timers) {
+               base->timer_jiffies = jiffies;
+               return true;
+       }
+       return false;
+}
+
+static void
+__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+{
+       unsigned long expires = timer->expires;
+       unsigned long idx = expires - base->timer_jiffies;
+       struct list_head *vec;
+
+       if (idx < TVR_SIZE) {
+               int i = expires & TVR_MASK;
+               vec = base->tv1.vec + i;
+       } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+               int i = (expires >> TVR_BITS) & TVN_MASK;
+               vec = base->tv2.vec + i;
+       } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+               int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+               vec = base->tv3.vec + i;
+       } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+               int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+               vec = base->tv4.vec + i;
+       } else if ((signed long) idx < 0) {
+               /*
+                * Can happen if you add a timer with expires == jiffies,
+                * or you set a timer to go off in the past
+                */
+               vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
+       } else {
+               int i;
+               /* If the timeout is larger than MAX_TVAL (on 64-bit
+                * architectures or with CONFIG_BASE_SMALL=1) then we
+                * use the maximum timeout.
+                */
+               if (idx > MAX_TVAL) {
+                       idx = MAX_TVAL;
+                       expires = idx + base->timer_jiffies;
+               }
+               i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+               vec = base->tv5.vec + i;
+       }
+       /*
+        * Timers are FIFO:
+        */
+       list_add_tail(&timer->entry, vec);
+}
+
+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+{
+       (void)catchup_timer_jiffies(base);
+       __internal_add_timer(base, timer);
+       /*
+        * Update base->active_timers and base->next_timer
+        */
+       if (!tbase_get_deferrable(timer->base)) {
+               if (!base->active_timers++ ||
+                   time_before(timer->expires, base->next_timer))
+                       base->next_timer = timer->expires;
+       }
+       base->all_timers++;
+}
+
+#ifdef CONFIG_TIMER_STATS
+void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
+{
+       if (timer->start_site)
+               return;
+
+       timer->start_site = addr;
+       memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
+       timer->start_pid = current->pid;
+}
+
+static void timer_stats_account_timer(struct timer_list *timer)
+{
+       unsigned int flag = 0;
+
+       if (likely(!timer->start_site))
+               return;
+       if (unlikely(tbase_get_deferrable(timer->base)))
+               flag |= TIMER_STATS_FLAG_DEFERRABLE;
+
+       timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
+                                timer->function, timer->start_comm, flag);
+}
+
+#else
+static void timer_stats_account_timer(struct timer_list *timer) {}
+#endif
+
+#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
+
+static struct debug_obj_descr timer_debug_descr;
+
+static void *timer_debug_hint(void *addr)
+{
+       return ((struct timer_list *) addr)->function;
+}
+
+/*
+ * fixup_init is called when:
+ * - an active object is initialized
+ */
+static int timer_fixup_init(void *addr, enum debug_obj_state state)
+{
+       struct timer_list *timer = addr;
+
+       switch (state) {
+       case ODEBUG_STATE_ACTIVE:
+               del_timer_sync(timer);
+               debug_object_init(timer, &timer_debug_descr);
+               return 1;
+       default:
+               return 0;
+       }
+}
+
+/* Stub timer callback for improperly used timers. */
+static void stub_timer(unsigned long data)
+{
+       WARN_ON(1);
+}
+
+/*
+ * fixup_activate is called when:
+ * - an active object is activated
+ * - an unknown object is activated (might be a statically initialized object)
+ */
+static int timer_fixup_activate(void *addr, enum debug_obj_state state)
+{
+       struct timer_list *timer = addr;
+
+       switch (state) {
+
+       case ODEBUG_STATE_NOTAVAILABLE:
+               /*
+                * This is not really a fixup. The timer was
+                * statically initialized. We just make sure that it
+                * is tracked in the object tracker.
+                */
+               if (timer->entry.next == NULL &&
+                   timer->entry.prev == TIMER_ENTRY_STATIC) {
+                       debug_object_init(timer, &timer_debug_descr);
+                       debug_object_activate(timer, &timer_debug_descr);
+                       return 0;
+               } else {
+                       setup_timer(timer, stub_timer, 0);
+                       return 1;
+               }
+               return 0;
+
+       case ODEBUG_STATE_ACTIVE:
+               WARN_ON(1);
+
+       default:
+               return 0;
+       }
+}
+
+/*
+ * fixup_free is called when:
+ * - an active object is freed
+ */
+static int timer_fixup_free(void *addr, enum debug_obj_state state)
+{
+       struct timer_list *timer = addr;
+
+       switch (state) {
+       case ODEBUG_STATE_ACTIVE:
+               del_timer_sync(timer);
+               debug_object_free(timer, &timer_debug_descr);
+               return 1;
+       default:
+               return 0;
+       }
+}
+
+/*
+ * fixup_assert_init is called when:
+ * - an untracked/uninit-ed object is found
+ */
+static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
+{
+       struct timer_list *timer = addr;
+
+       switch (state) {
+       case ODEBUG_STATE_NOTAVAILABLE:
+               if (timer->entry.prev == TIMER_ENTRY_STATIC) {
+                       /*
+                        * This is not really a fixup. The timer was
+                        * statically initialized. We just make sure that it
+                        * is tracked in the object tracker.
+                        */
+                       debug_object_init(timer, &timer_debug_descr);
+                       return 0;
+               } else {
+                       setup_timer(timer, stub_timer, 0);
+                       return 1;
+               }
+       default:
+               return 0;
+       }
+}
+
+static struct debug_obj_descr timer_debug_descr = {
+       .name                   = "timer_list",
+       .debug_hint             = timer_debug_hint,
+       .fixup_init             = timer_fixup_init,
+       .fixup_activate         = timer_fixup_activate,
+       .fixup_free             = timer_fixup_free,
+       .fixup_assert_init      = timer_fixup_assert_init,
+};
+
+static inline void debug_timer_init(struct timer_list *timer)
+{
+       debug_object_init(timer, &timer_debug_descr);
+}
+
+static inline void debug_timer_activate(struct timer_list *timer)
+{
+       debug_object_activate(timer, &timer_debug_descr);
+}
+
+static inline void debug_timer_deactivate(struct timer_list *timer)
+{
+       debug_object_deactivate(timer, &timer_debug_descr);
+}
+
+static inline void debug_timer_free(struct timer_list *timer)
+{
+       debug_object_free(timer, &timer_debug_descr);
+}
+
+static inline void debug_timer_assert_init(struct timer_list *timer)
+{
+       debug_object_assert_init(timer, &timer_debug_descr);
+}
+
+static void do_init_timer(struct timer_list *timer, unsigned int flags,
+                         const char *name, struct lock_class_key *key);
+
+void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags,
+                            const char *name, struct lock_class_key *key)
+{
+       debug_object_init_on_stack(timer, &timer_debug_descr);
+       do_init_timer(timer, flags, name, key);
+}
+EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
+
+void destroy_timer_on_stack(struct timer_list *timer)
+{
+       debug_object_free(timer, &timer_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
+
+#else
+static inline void debug_timer_init(struct timer_list *timer) { }
+static inline void debug_timer_activate(struct timer_list *timer) { }
+static inline void debug_timer_deactivate(struct timer_list *timer) { }
+static inline void debug_timer_assert_init(struct timer_list *timer) { }
+#endif
+
+static inline void debug_init(struct timer_list *timer)
+{
+       debug_timer_init(timer);
+       trace_timer_init(timer);
+}
+
+static inline void
+debug_activate(struct timer_list *timer, unsigned long expires)
+{
+       debug_timer_activate(timer);
+       trace_timer_start(timer, expires);
+}
+
+static inline void debug_deactivate(struct timer_list *timer)
+{
+       debug_timer_deactivate(timer);
+       trace_timer_cancel(timer);
+}
+
+static inline void debug_assert_init(struct timer_list *timer)
+{
+       debug_timer_assert_init(timer);
+}
+
+static void do_init_timer(struct timer_list *timer, unsigned int flags,
+                         const char *name, struct lock_class_key *key)
+{
+       struct tvec_base *base = __raw_get_cpu_var(tvec_bases);
+
+       timer->entry.next = NULL;
+       timer->base = (void *)((unsigned long)base | flags);
+       timer->slack = -1;
+#ifdef CONFIG_TIMER_STATS
+       timer->start_site = NULL;
+       timer->start_pid = -1;
+       memset(timer->start_comm, 0, TASK_COMM_LEN);
+#endif
+       lockdep_init_map(&timer->lockdep_map, name, key, 0);
+}
+
+/**
+ * init_timer_key - initialize a timer
+ * @timer: the timer to be initialized
+ * @flags: timer flags
+ * @name: name of the timer
+ * @key: lockdep class key of the fake lock used for tracking timer
+ *       sync lock dependencies
+ *
+ * init_timer_key() must be done to a timer prior calling *any* of the
+ * other timer functions.
+ */
+void init_timer_key(struct timer_list *timer, unsigned int flags,
+                   const char *name, struct lock_class_key *key)
+{
+       debug_init(timer);
+       do_init_timer(timer, flags, name, key);
+}
+EXPORT_SYMBOL(init_timer_key);
+
+static inline void detach_timer(struct timer_list *timer, bool clear_pending)
+{
+       struct list_head *entry = &timer->entry;
+
+       debug_deactivate(timer);
+
+       __list_del(entry->prev, entry->next);
+       if (clear_pending)
+               entry->next = NULL;
+       entry->prev = LIST_POISON2;
+}
+
+static inline void
+detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
+{
+       detach_timer(timer, true);
+       if (!tbase_get_deferrable(timer->base))
+               base->active_timers--;
+       base->all_timers--;
+       (void)catchup_timer_jiffies(base);
+}
+
+static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
+                            bool clear_pending)
+{
+       if (!timer_pending(timer))
+               return 0;
+
+       detach_timer(timer, clear_pending);
+       if (!tbase_get_deferrable(timer->base)) {
+               base->active_timers--;
+               if (timer->expires == base->next_timer)
+                       base->next_timer = base->timer_jiffies;
+       }
+       base->all_timers--;
+       (void)catchup_timer_jiffies(base);
+       return 1;
+}
+
+/*
+ * We are using hashed locking: holding per_cpu(tvec_bases).lock
+ * means that all timers which are tied to this base via timer->base are
+ * locked, and the base itself is locked too.
+ *
+ * So __run_timers/migrate_timers can safely modify all timers which could
+ * be found on ->tvX lists.
+ *
+ * When the timer's base is locked, and the timer removed from list, it is
+ * possible to set timer->base = NULL and drop the lock: the timer remains
+ * locked.
+ */
+static struct tvec_base *lock_timer_base(struct timer_list *timer,
+                                       unsigned long *flags)
+       __acquires(timer->base->lock)
+{
+       struct tvec_base *base;
+
+       for (;;) {
+               struct tvec_base *prelock_base = timer->base;
+               base = tbase_get_base(prelock_base);
+               if (likely(base != NULL)) {
+                       spin_lock_irqsave(&base->lock, *flags);
+                       if (likely(prelock_base == timer->base))
+                               return base;
+                       /* The timer has migrated to another CPU */
+                       spin_unlock_irqrestore(&base->lock, *flags);
+               }
+               cpu_relax();
+       }
+}
+
+static inline int
+__mod_timer(struct timer_list *timer, unsigned long expires,
+                                               bool pending_only, int pinned)
+{
+       struct tvec_base *base, *new_base;
+       unsigned long flags;
+       int ret = 0 , cpu;
+
+       timer_stats_timer_set_start_info(timer);
+       BUG_ON(!timer->function);
+
+       base = lock_timer_base(timer, &flags);
+
+       ret = detach_if_pending(timer, base, false);
+       if (!ret && pending_only)
+               goto out_unlock;
+
+       debug_activate(timer, expires);
+
+       cpu = get_nohz_timer_target(pinned);
+       new_base = per_cpu(tvec_bases, cpu);
+
+       if (base != new_base) {
+               /*
+                * We are trying to schedule the timer on the local CPU.
+                * However we can't change timer's base while it is running,
+                * otherwise del_timer_sync() can't detect that the timer's
+                * handler yet has not finished. This also guarantees that
+                * the timer is serialized wrt itself.
+                */
+               if (likely(base->running_timer != timer)) {
+                       /* See the comment in lock_timer_base() */
+                       timer_set_base(timer, NULL);
+                       spin_unlock(&base->lock);
+                       base = new_base;
+                       spin_lock(&base->lock);
+                       timer_set_base(timer, base);
+               }
+       }
+
+       timer->expires = expires;
+       internal_add_timer(base, timer);
+
+out_unlock:
+       spin_unlock_irqrestore(&base->lock, flags);
+
+       return ret;
+}
+
+/**
+ * mod_timer_pending - modify a pending timer's timeout
+ * @timer: the pending timer to be modified
+ * @expires: new timeout in jiffies
+ *
+ * mod_timer_pending() is the same for pending timers as mod_timer(),
+ * but will not re-activate and modify already deleted timers.
+ *
+ * It is useful for unserialized use of timers.
+ */
+int mod_timer_pending(struct timer_list *timer, unsigned long expires)
+{
+       return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
+}
+EXPORT_SYMBOL(mod_timer_pending);
+
+/*
+ * Decide where to put the timer while taking the slack into account
+ *
+ * Algorithm:
+ *   1) calculate the maximum (absolute) time
+ *   2) calculate the highest bit where the expires and new max are different
+ *   3) use this bit to make a mask
+ *   4) use the bitmask to round down the maximum time, so that all last
+ *      bits are zeros
+ */
+static inline
+unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
+{
+       unsigned long expires_limit, mask;
+       int bit;
+
+       if (timer->slack >= 0) {
+               expires_limit = expires + timer->slack;
+       } else {
+               long delta = expires - jiffies;
+
+               if (delta < 256)
+                       return expires;
+
+               expires_limit = expires + delta / 256;
+       }
+       mask = expires ^ expires_limit;
+       if (mask == 0)
+               return expires;
+
+       bit = find_last_bit(&mask, BITS_PER_LONG);
+
+       mask = (1UL << bit) - 1;
+
+       expires_limit = expires_limit & ~(mask);
+
+       return expires_limit;
+}
+
+/**
+ * mod_timer - modify a timer's timeout
+ * @timer: the timer to be modified
+ * @expires: new timeout in jiffies
+ *
+ * mod_timer() is a more efficient way to update the expire field of an
+ * active timer (if the timer is inactive it will be activated)
+ *
+ * mod_timer(timer, expires) is equivalent to:
+ *
+ *     del_timer(timer); timer->expires = expires; add_timer(timer);
+ *
+ * Note that if there are multiple unserialized concurrent users of the
+ * same timer, then mod_timer() is the only safe way to modify the timeout,
+ * since add_timer() cannot modify an already running timer.
+ *
+ * The function returns whether it has modified a pending timer or not.
+ * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
+ * active timer returns 1.)
+ */
+int mod_timer(struct timer_list *timer, unsigned long expires)
+{
+       expires = apply_slack(timer, expires);
+
+       /*
+        * This is a common optimization triggered by the
+        * networking code - if the timer is re-modified
+        * to be the same thing then just return:
+        */
+       if (timer_pending(timer) && timer->expires == expires)
+               return 1;
+
+       return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
+}
+EXPORT_SYMBOL(mod_timer);
+
+/**
+ * mod_timer_pinned - modify a timer's timeout
+ * @timer: the timer to be modified
+ * @expires: new timeout in jiffies
+ *
+ * mod_timer_pinned() is a way to update the expire field of an
+ * active timer (if the timer is inactive it will be activated)
+ * and to ensure that the timer is scheduled on the current CPU.
+ *
+ * Note that this does not prevent the timer from being migrated
+ * when the current CPU goes offline.  If this is a problem for
+ * you, use CPU-hotplug notifiers to handle it correctly, for
+ * example, cancelling the timer when the corresponding CPU goes
+ * offline.
+ *
+ * mod_timer_pinned(timer, expires) is equivalent to:
+ *
+ *     del_timer(timer); timer->expires = expires; add_timer(timer);
+ */
+int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
+{
+       if (timer->expires == expires && timer_pending(timer))
+               return 1;
+
+       return __mod_timer(timer, expires, false, TIMER_PINNED);
+}
+EXPORT_SYMBOL(mod_timer_pinned);
+
+/**
+ * add_timer - start a timer
+ * @timer: the timer to be added
+ *
+ * The kernel will do a ->function(->data) callback from the
+ * timer interrupt at the ->expires point in the future. The
+ * current time is 'jiffies'.
+ *
+ * The timer's ->expires, ->function (and if the handler uses it, ->data)
+ * fields must be set prior calling this function.
+ *
+ * Timers with an ->expires field in the past will be executed in the next
+ * timer tick.
+ */
+void add_timer(struct timer_list *timer)
+{
+       BUG_ON(timer_pending(timer));
+       mod_timer(timer, timer->expires);
+}
+EXPORT_SYMBOL(add_timer);
+
+/**
+ * add_timer_on - start a timer on a particular CPU
+ * @timer: the timer to be added
+ * @cpu: the CPU to start it on
+ *
+ * This is not very scalable on SMP. Double adds are not possible.
+ */
+void add_timer_on(struct timer_list *timer, int cpu)
+{
+       struct tvec_base *base = per_cpu(tvec_bases, cpu);
+       unsigned long flags;
+
+       timer_stats_timer_set_start_info(timer);
+       BUG_ON(timer_pending(timer) || !timer->function);
+       spin_lock_irqsave(&base->lock, flags);
+       timer_set_base(timer, base);
+       debug_activate(timer, timer->expires);
+       internal_add_timer(base, timer);
+       /*
+        * Check whether the other CPU is in dynticks mode and needs
+        * to be triggered to reevaluate the timer wheel.
+        * We are protected against the other CPU fiddling
+        * with the timer by holding the timer base lock. This also
+        * makes sure that a CPU on the way to stop its tick can not
+        * evaluate the timer wheel.
+        *
+        * Spare the IPI for deferrable timers on idle targets though.
+        * The next busy ticks will take care of it. Except full dynticks
+        * require special care against races with idle_cpu(), lets deal
+        * with that later.
+        */
+       if (!tbase_get_deferrable(timer->base) || tick_nohz_full_cpu(cpu))
+               wake_up_nohz_cpu(cpu);
+
+       spin_unlock_irqrestore(&base->lock, flags);
+}
+EXPORT_SYMBOL_GPL(add_timer_on);
+
+/**
+ * del_timer - deactive a timer.
+ * @timer: the timer to be deactivated
+ *
+ * del_timer() deactivates a timer - this works on both active and inactive
+ * timers.
+ *
+ * The function returns whether it has deactivated a pending timer or not.
+ * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
+ * active timer returns 1.)
+ */
+int del_timer(struct timer_list *timer)
+{
+       struct tvec_base *base;
+       unsigned long flags;
+       int ret = 0;
+
+       debug_assert_init(timer);
+
+       timer_stats_timer_clear_start_info(timer);
+       if (timer_pending(timer)) {
+               base = lock_timer_base(timer, &flags);
+               ret = detach_if_pending(timer, base, true);
+               spin_unlock_irqrestore(&base->lock, flags);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(del_timer);
+
+/**
+ * try_to_del_timer_sync - Try to deactivate a timer
+ * @timer: timer do del
+ *
+ * This function tries to deactivate a timer. Upon successful (ret >= 0)
+ * exit the timer is not queued and the handler is not running on any CPU.
+ */
+int try_to_del_timer_sync(struct timer_list *timer)
+{
+       struct tvec_base *base;
+       unsigned long flags;
+       int ret = -1;
+
+       debug_assert_init(timer);
+
+       base = lock_timer_base(timer, &flags);
+
+       if (base->running_timer != timer) {
+               timer_stats_timer_clear_start_info(timer);
+               ret = detach_if_pending(timer, base, true);
+       }
+       spin_unlock_irqrestore(&base->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(try_to_del_timer_sync);
+
+#ifdef CONFIG_SMP
+/**
+ * del_timer_sync - deactivate a timer and wait for the handler to finish.
+ * @timer: the timer to be deactivated
+ *
+ * This function only differs from del_timer() on SMP: besides deactivating
+ * the timer it also makes sure the handler has finished executing on other
+ * CPUs.
+ *
+ * Synchronization rules: Callers must prevent restarting of the timer,
+ * otherwise this function is meaningless. It must not be called from
+ * interrupt contexts unless the timer is an irqsafe one. The caller must
+ * not hold locks which would prevent completion of the timer's
+ * handler. The timer's handler must not call add_timer_on(). Upon exit the
+ * timer is not queued and the handler is not running on any CPU.
+ *
+ * Note: For !irqsafe timers, you must not hold locks that are held in
+ *   interrupt context while calling this function. Even if the lock has
+ *   nothing to do with the timer in question.  Here's why:
+ *
+ *    CPU0                             CPU1
+ *    ----                             ----
+ *                                   <SOFTIRQ>
+ *                                   call_timer_fn();
+ *                                     base->running_timer = mytimer;
+ *  spin_lock_irq(somelock);
+ *                                     <IRQ>
+ *                                        spin_lock(somelock);
+ *  del_timer_sync(mytimer);
+ *   while (base->running_timer == mytimer);
+ *
+ * Now del_timer_sync() will never return and never release somelock.
+ * The interrupt on the other CPU is waiting to grab somelock but
+ * it has interrupted the softirq that CPU0 is waiting to finish.
+ *
+ * The function returns whether it has deactivated a pending timer or not.
+ */
+int del_timer_sync(struct timer_list *timer)
+{
+#ifdef CONFIG_LOCKDEP
+       unsigned long flags;
+
+       /*
+        * If lockdep gives a backtrace here, please reference
+        * the synchronization rules above.
+        */
+       local_irq_save(flags);
+       lock_map_acquire(&timer->lockdep_map);
+       lock_map_release(&timer->lockdep_map);
+       local_irq_restore(flags);
+#endif
+       /*
+        * don't use it in hardirq context, because it
+        * could lead to deadlock.
+        */
+       WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base));
+       for (;;) {
+               int ret = try_to_del_timer_sync(timer);
+               if (ret >= 0)
+                       return ret;
+               cpu_relax();
+       }
+}
+EXPORT_SYMBOL(del_timer_sync);
+#endif
+
+static int cascade(struct tvec_base *base, struct tvec *tv, int index)
+{
+       /* cascade all the timers from tv up one level */
+       struct timer_list *timer, *tmp;
+       struct list_head tv_list;
+
+       list_replace_init(tv->vec + index, &tv_list);
+
+       /*
+        * We are removing _all_ timers from the list, so we
+        * don't have to detach them individually.
+        */
+       list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
+               BUG_ON(tbase_get_base(timer->base) != base);
+               /* No accounting, while moving them */
+               __internal_add_timer(base, timer);
+       }
+
+       return index;
+}
+
+static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
+                         unsigned long data)
+{
+       int count = preempt_count();
+
+#ifdef CONFIG_LOCKDEP
+       /*
+        * It is permissible to free the timer from inside the
+        * function that is called from it, this we need to take into
+        * account for lockdep too. To avoid bogus "held lock freed"
+        * warnings as well as problems when looking into
+        * timer->lockdep_map, make a copy and use that here.
+        */
+       struct lockdep_map lockdep_map;
+
+       lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
+#endif
+       /*
+        * Couple the lock chain with the lock chain at
+        * del_timer_sync() by acquiring the lock_map around the fn()
+        * call here and in del_timer_sync().
+        */
+       lock_map_acquire(&lockdep_map);
+
+       trace_timer_expire_entry(timer);
+       fn(data);
+       trace_timer_expire_exit(timer);
+
+       lock_map_release(&lockdep_map);
+
+       if (count != preempt_count()) {
+               WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+                         fn, count, preempt_count());
+               /*
+                * Restore the preempt count. That gives us a decent
+                * chance to survive and extract information. If the
+                * callback kept a lock held, bad luck, but not worse
+                * than the BUG() we had.
+                */
+               preempt_count_set(count);
+       }
+}
+
+#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
+
+/**
+ * __run_timers - run all expired timers (if any) on this CPU.
+ * @base: the timer vector to be processed.
+ *
+ * This function cascades all vectors and executes all expired timer
+ * vectors.
+ */
+static inline void __run_timers(struct tvec_base *base)
+{
+       struct timer_list *timer;
+
+       spin_lock_irq(&base->lock);
+       if (catchup_timer_jiffies(base)) {
+               spin_unlock_irq(&base->lock);
+               return;
+       }
+       while (time_after_eq(jiffies, base->timer_jiffies)) {
+               struct list_head work_list;
+               struct list_head *head = &work_list;
+               int index = base->timer_jiffies & TVR_MASK;
+
+               /*
+                * Cascade timers:
+                */
+               if (!index &&
+                       (!cascade(base, &base->tv2, INDEX(0))) &&
+                               (!cascade(base, &base->tv3, INDEX(1))) &&
+                                       !cascade(base, &base->tv4, INDEX(2)))
+                       cascade(base, &base->tv5, INDEX(3));
+               ++base->timer_jiffies;
+               list_replace_init(base->tv1.vec + index, head);
+               while (!list_empty(head)) {
+                       void (*fn)(unsigned long);
+                       unsigned long data;
+                       bool irqsafe;
+
+                       timer = list_first_entry(head, struct timer_list,entry);
+                       fn = timer->function;
+                       data = timer->data;
+                       irqsafe = tbase_get_irqsafe(timer->base);
+
+                       timer_stats_account_timer(timer);
+
+                       base->running_timer = timer;
+                       detach_expired_timer(timer, base);
+
+                       if (irqsafe) {
+                               spin_unlock(&base->lock);
+                               call_timer_fn(timer, fn, data);
+                               spin_lock(&base->lock);
+                       } else {
+                               spin_unlock_irq(&base->lock);
+                               call_timer_fn(timer, fn, data);
+                               spin_lock_irq(&base->lock);
+                       }
+               }
+       }
+       base->running_timer = NULL;
+       spin_unlock_irq(&base->lock);
+}
+
+#ifdef CONFIG_NO_HZ_COMMON
+/*
+ * Find out when the next timer event is due to happen. This
+ * is used on S/390 to stop all activity when a CPU is idle.
+ * This function needs to be called with interrupts disabled.
+ */
+static unsigned long __next_timer_interrupt(struct tvec_base *base)
+{
+       unsigned long timer_jiffies = base->timer_jiffies;
+       unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
+       int index, slot, array, found = 0;
+       struct timer_list *nte;
+       struct tvec *varray[4];
+
+       /* Look for timer events in tv1. */
+       index = slot = timer_jiffies & TVR_MASK;
+       do {
+               list_for_each_entry(nte, base->tv1.vec + slot, entry) {
+                       if (tbase_get_deferrable(nte->base))
+                               continue;
+
+                       found = 1;
+                       expires = nte->expires;
+                       /* Look at the cascade bucket(s)? */
+                       if (!index || slot < index)
+                               goto cascade;
+                       return expires;
+               }
+               slot = (slot + 1) & TVR_MASK;
+       } while (slot != index);
+
+cascade:
+       /* Calculate the next cascade event */
+       if (index)
+               timer_jiffies += TVR_SIZE - index;
+       timer_jiffies >>= TVR_BITS;
+
+       /* Check tv2-tv5. */
+       varray[0] = &base->tv2;
+       varray[1] = &base->tv3;
+       varray[2] = &base->tv4;
+       varray[3] = &base->tv5;
+
+       for (array = 0; array < 4; array++) {
+               struct tvec *varp = varray[array];
+
+               index = slot = timer_jiffies & TVN_MASK;
+               do {
+                       list_for_each_entry(nte, varp->vec + slot, entry) {
+                               if (tbase_get_deferrable(nte->base))
+                                       continue;
+
+                               found = 1;
+                               if (time_before(nte->expires, expires))
+                                       expires = nte->expires;
+                       }
+                       /*
+                        * Do we still search for the first timer or are
+                        * we looking up the cascade buckets ?
+                        */
+                       if (found) {
+                               /* Look at the cascade bucket(s)? */
+                               if (!index || slot < index)
+                                       break;
+                               return expires;
+                       }
+                       slot = (slot + 1) & TVN_MASK;
+               } while (slot != index);
+
+               if (index)
+                       timer_jiffies += TVN_SIZE - index;
+               timer_jiffies >>= TVN_BITS;
+       }
+       return expires;
+}
+
+/*
+ * Check, if the next hrtimer event is before the next timer wheel
+ * event:
+ */
+static unsigned long cmp_next_hrtimer_event(unsigned long now,
+                                           unsigned long expires)
+{
+       ktime_t hr_delta = hrtimer_get_next_event();
+       struct timespec tsdelta;
+       unsigned long delta;
+
+       if (hr_delta.tv64 == KTIME_MAX)
+               return expires;
+
+       /*
+        * Expired timer available, let it expire in the next tick
+        */
+       if (hr_delta.tv64 <= 0)
+               return now + 1;
+
+       tsdelta = ktime_to_timespec(hr_delta);
+       delta = timespec_to_jiffies(&tsdelta);
+
+       /*
+        * Limit the delta to the max value, which is checked in
+        * tick_nohz_stop_sched_tick():
+        */
+       if (delta > NEXT_TIMER_MAX_DELTA)
+               delta = NEXT_TIMER_MAX_DELTA;
+
+       /*
+        * Take rounding errors in to account and make sure, that it
+        * expires in the next tick. Otherwise we go into an endless
+        * ping pong due to tick_nohz_stop_sched_tick() retriggering
+        * the timer softirq
+        */
+       if (delta < 1)
+               delta = 1;
+       now += delta;
+       if (time_before(now, expires))
+               return now;
+       return expires;
+}
+
+/**
+ * get_next_timer_interrupt - return the jiffy of the next pending timer
+ * @now: current time (in jiffies)
+ */
+unsigned long get_next_timer_interrupt(unsigned long now)
+{
+       struct tvec_base *base = __this_cpu_read(tvec_bases);
+       unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
+
+       /*
+        * Pretend that there is no timer pending if the cpu is offline.
+        * Possible pending timers will be migrated later to an active cpu.
+        */
+       if (cpu_is_offline(smp_processor_id()))
+               return expires;
+
+       spin_lock(&base->lock);
+       if (base->active_timers) {
+               if (time_before_eq(base->next_timer, base->timer_jiffies))
+                       base->next_timer = __next_timer_interrupt(base);
+               expires = base->next_timer;
+       }
+       spin_unlock(&base->lock);
+
+       if (time_before_eq(expires, now))
+               return now;
+
+       return cmp_next_hrtimer_event(now, expires);
+}
+#endif
+
+/*
+ * Called from the timer interrupt handler to charge one tick to the current
+ * process.  user_tick is 1 if the tick is user time, 0 for system.
+ */
+void update_process_times(int user_tick)
+{
+       struct task_struct *p = current;
+       int cpu = smp_processor_id();
+
+       /* Note: this timer irq context must be accounted for as well. */
+       account_process_tick(p, user_tick);
+       run_local_timers();
+       rcu_check_callbacks(cpu, user_tick);
+#ifdef CONFIG_IRQ_WORK
+       if (in_irq())
+               irq_work_run();
+#endif
+       scheduler_tick();
+       run_posix_cpu_timers(p);
+}
+
+/*
+ * This function runs timers and the timer-tq in bottom half context.
+ */
+static void run_timer_softirq(struct softirq_action *h)
+{
+       struct tvec_base *base = __this_cpu_read(tvec_bases);
+
+       hrtimer_run_pending();
+
+       if (time_after_eq(jiffies, base->timer_jiffies))
+               __run_timers(base);
+}
+
+/*
+ * Called by the local, per-CPU timer interrupt on SMP.
+ */
+void run_local_timers(void)
+{
+       hrtimer_run_queues();
+       raise_softirq(TIMER_SOFTIRQ);
+}
+
+#ifdef __ARCH_WANT_SYS_ALARM
+
+/*
+ * For backwards compatibility?  This can be done in libc so Alpha
+ * and all newer ports shouldn't need it.
+ */
+SYSCALL_DEFINE1(alarm, unsigned int, seconds)
+{
+       return alarm_setitimer(seconds);
+}
+
+#endif
+
+static void process_timeout(unsigned long __data)
+{
+       wake_up_process((struct task_struct *)__data);
+}
+
+/**
+ * schedule_timeout - sleep until timeout
+ * @timeout: timeout value in jiffies
+ *
+ * Make the current task sleep until @timeout jiffies have
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
+ * pass before the routine returns. The routine will return 0
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task. In this case the remaining time
+ * in jiffies will be returned, or 0 if the timer expired in time
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
+ * the CPU away without a bound on the timeout. In this case the return
+ * value will be %MAX_SCHEDULE_TIMEOUT.
+ *
+ * In all cases the return value is guaranteed to be non-negative.
+ */
+signed long __sched schedule_timeout(signed long timeout)
+{
+       struct timer_list timer;
+       unsigned long expire;
+
+       switch (timeout)
+       {
+       case MAX_SCHEDULE_TIMEOUT:
+               /*
+                * These two special cases are useful to be comfortable
+                * in the caller. Nothing more. We could take
+                * MAX_SCHEDULE_TIMEOUT from one of the negative value
+                * but I' d like to return a valid offset (>=0) to allow
+                * the caller to do everything it want with the retval.
+                */
+               schedule();
+               goto out;
+       default:
+               /*
+                * Another bit of PARANOID. Note that the retval will be
+                * 0 since no piece of kernel is supposed to do a check
+                * for a negative retval of schedule_timeout() (since it
+                * should never happens anyway). You just have the printk()
+                * that will tell you if something is gone wrong and where.
+                */
+               if (timeout < 0) {
+                       printk(KERN_ERR "schedule_timeout: wrong timeout "
+                               "value %lx\n", timeout);
+                       dump_stack();
+                       current->state = TASK_RUNNING;
+                       goto out;
+               }
+       }
+
+       expire = timeout + jiffies;
+
+       setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
+       __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
+       schedule();
+       del_singleshot_timer_sync(&timer);
+
+       /* Remove the timer from the object tracker */
+       destroy_timer_on_stack(&timer);
+
+       timeout = expire - jiffies;
+
+ out:
+       return timeout < 0 ? 0 : timeout;
+}
+EXPORT_SYMBOL(schedule_timeout);
+
+/*
+ * We can use __set_current_state() here because schedule_timeout() calls
+ * schedule() unconditionally.
+ */
+signed long __sched schedule_timeout_interruptible(signed long timeout)
+{
+       __set_current_state(TASK_INTERRUPTIBLE);
+       return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_interruptible);
+
+signed long __sched schedule_timeout_killable(signed long timeout)
+{
+       __set_current_state(TASK_KILLABLE);
+       return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_killable);
+
+signed long __sched schedule_timeout_uninterruptible(signed long timeout)
+{
+       __set_current_state(TASK_UNINTERRUPTIBLE);
+       return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_uninterruptible);
+
+static int init_timers_cpu(int cpu)
+{
+       int j;
+       struct tvec_base *base;
+       static char tvec_base_done[NR_CPUS];
+
+       if (!tvec_base_done[cpu]) {
+               static char boot_done;
+
+               if (boot_done) {
+                       /*
+                        * The APs use this path later in boot
+                        */
+                       base = kzalloc_node(sizeof(*base), GFP_KERNEL,
+                                           cpu_to_node(cpu));
+                       if (!base)
+                               return -ENOMEM;
+
+                       /* Make sure tvec_base has TIMER_FLAG_MASK bits free */
+                       if (WARN_ON(base != tbase_get_base(base))) {
+                               kfree(base);
+                               return -ENOMEM;
+                       }
+                       per_cpu(tvec_bases, cpu) = base;
+               } else {
+                       /*
+                        * This is for the boot CPU - we use compile-time
+                        * static initialisation because per-cpu memory isn't
+                        * ready yet and because the memory allocators are not
+                        * initialised either.
+                        */
+                       boot_done = 1;
+                       base = &boot_tvec_bases;
+               }
+               spin_lock_init(&base->lock);
+               tvec_base_done[cpu] = 1;
+       } else {
+               base = per_cpu(tvec_bases, cpu);
+       }
+
+
+       for (j = 0; j < TVN_SIZE; j++) {
+               INIT_LIST_HEAD(base->tv5.vec + j);
+               INIT_LIST_HEAD(base->tv4.vec + j);
+               INIT_LIST_HEAD(base->tv3.vec + j);
+               INIT_LIST_HEAD(base->tv2.vec + j);
+       }
+       for (j = 0; j < TVR_SIZE; j++)
+               INIT_LIST_HEAD(base->tv1.vec + j);
+
+       base->timer_jiffies = jiffies;
+       base->next_timer = base->timer_jiffies;
+       base->active_timers = 0;
+       base->all_timers = 0;
+       return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
+{
+       struct timer_list *timer;
+
+       while (!list_empty(head)) {
+               timer = list_first_entry(head, struct timer_list, entry);
+               /* We ignore the accounting on the dying cpu */
+               detach_timer(timer, false);
+               timer_set_base(timer, new_base);
+               internal_add_timer(new_base, timer);
+       }
+}
+
+static void migrate_timers(int cpu)
+{
+       struct tvec_base *old_base;
+       struct tvec_base *new_base;
+       int i;
+
+       BUG_ON(cpu_online(cpu));
+       old_base = per_cpu(tvec_bases, cpu);
+       new_base = get_cpu_var(tvec_bases);
+       /*
+        * The caller is globally serialized and nobody else
+        * takes two locks at once, deadlock is not possible.
+        */
+       spin_lock_irq(&new_base->lock);
+       spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+
+       BUG_ON(old_base->running_timer);
+
+       for (i = 0; i < TVR_SIZE; i++)
+               migrate_timer_list(new_base, old_base->tv1.vec + i);
+       for (i = 0; i < TVN_SIZE; i++) {
+               migrate_timer_list(new_base, old_base->tv2.vec + i);
+               migrate_timer_list(new_base, old_base->tv3.vec + i);
+               migrate_timer_list(new_base, old_base->tv4.vec + i);
+               migrate_timer_list(new_base, old_base->tv5.vec + i);
+       }
+
+       spin_unlock(&old_base->lock);
+       spin_unlock_irq(&new_base->lock);
+       put_cpu_var(tvec_bases);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int timer_cpu_notify(struct notifier_block *self,
+                               unsigned long action, void *hcpu)
+{
+       long cpu = (long)hcpu;
+       int err;
+
+       switch(action) {
+       case CPU_UP_PREPARE:
+       case CPU_UP_PREPARE_FROZEN:
+               err = init_timers_cpu(cpu);
+               if (err < 0)
+                       return notifier_from_errno(err);
+               break;
+#ifdef CONFIG_HOTPLUG_CPU
+       case CPU_DEAD:
+       case CPU_DEAD_FROZEN:
+               migrate_timers(cpu);
+               break;
+#endif
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block timers_nb = {
+       .notifier_call  = timer_cpu_notify,
+};
+
+
+void __init init_timers(void)
+{
+       int err;
+
+       /* ensure there are enough low bits for flags in timer->base pointer */
+       BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
+
+       err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
+                              (void *)(long)smp_processor_id());
+       BUG_ON(err != NOTIFY_OK);
+
+       init_timer_stats();
+       register_cpu_notifier(&timers_nb);
+       open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
+}
+
+/**
+ * msleep - sleep safely even with waitqueue interruptions
+ * @msecs: Time in milliseconds to sleep for
+ */
+void msleep(unsigned int msecs)
+{
+       unsigned long timeout = msecs_to_jiffies(msecs) + 1;
+
+       while (timeout)
+               timeout = schedule_timeout_uninterruptible(timeout);
+}
+
+EXPORT_SYMBOL(msleep);
+
+/**
+ * msleep_interruptible - sleep waiting for signals
+ * @msecs: Time in milliseconds to sleep for
+ */
+unsigned long msleep_interruptible(unsigned int msecs)
+{
+       unsigned long timeout = msecs_to_jiffies(msecs) + 1;
+
+       while (timeout && !signal_pending(current))
+               timeout = schedule_timeout_interruptible(timeout);
+       return jiffies_to_msecs(timeout);
+}
+
+EXPORT_SYMBOL(msleep_interruptible);
+
+static int __sched do_usleep_range(unsigned long min, unsigned long max)
+{
+       ktime_t kmin;
+       unsigned long delta;
+
+       kmin = ktime_set(0, min * NSEC_PER_USEC);
+       delta = (max - min) * NSEC_PER_USEC;
+       return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
+}
+
+/**
+ * usleep_range - Drop in replacement for udelay where wakeup is flexible
+ * @min: Minimum time in usecs to sleep
+ * @max: Maximum time in usecs to sleep
+ */
+void usleep_range(unsigned long min, unsigned long max)
+{
+       __set_current_state(TASK_UNINTERRUPTIBLE);
+       do_usleep_range(min, max);
+}
+EXPORT_SYMBOL(usleep_range);
diff --git a/kernel/timeconst.bc b/kernel/timeconst.bc
deleted file mode 100644 (file)
index 511bdf2..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-scale=0
-
-define gcd(a,b) {
-       auto t;
-       while (b) {
-               t = b;
-               b = a % b;
-               a = t;
-       }
-       return a;
-}
-
-/* Division by reciprocal multiplication. */
-define fmul(b,n,d) {
-       return (2^b*n+d-1)/d;
-}
-
-/* Adjustment factor when a ceiling value is used.  Use as:
-   (imul * n) + (fmulxx * n + fadjxx) >> xx) */
-define fadj(b,n,d) {
-       auto v;
-       d = d/gcd(n,d);
-       v = 2^b*(d-1)/d;
-       return v;
-}
-
-/* Compute the appropriate mul/adj values as well as a shift count,
-   which brings the mul value into the range 2^b-1 <= x < 2^b.  Such
-   a shift value will be correct in the signed integer range and off
-   by at most one in the upper half of the unsigned range. */
-define fmuls(b,n,d) {
-       auto s, m;
-       for (s = 0; 1; s++) {
-               m = fmul(s,n,d);
-               if (m >= 2^(b-1))
-                       return s;
-       }
-       return 0;
-}
-
-define timeconst(hz) {
-       print "/* Automatically generated by kernel/timeconst.bc */\n"
-       print "/* Time conversion constants for HZ == ", hz, " */\n"
-       print "\n"
-
-       print "#ifndef KERNEL_TIMECONST_H\n"
-       print "#define KERNEL_TIMECONST_H\n\n"
-
-       print "#include <linux/param.h>\n"
-       print "#include <linux/types.h>\n\n"
-
-       print "#if HZ != ", hz, "\n"
-       print "#error \qkernel/timeconst.h has the wrong HZ value!\q\n"
-       print "#endif\n\n"
-
-       if (hz < 2) {
-               print "#error Totally bogus HZ value!\n"
-       } else {
-               s=fmuls(32,1000,hz)
-               obase=16
-               print "#define HZ_TO_MSEC_MUL32\tU64_C(0x", fmul(s,1000,hz), ")\n"
-               print "#define HZ_TO_MSEC_ADJ32\tU64_C(0x", fadj(s,1000,hz), ")\n"
-               obase=10
-               print "#define HZ_TO_MSEC_SHR32\t", s, "\n"
-
-               s=fmuls(32,hz,1000)
-               obase=16
-               print "#define MSEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000), ")\n"
-               print "#define MSEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000), ")\n"
-               obase=10
-               print "#define MSEC_TO_HZ_SHR32\t", s, "\n"
-
-               obase=10
-               cd=gcd(hz,1000)
-               print "#define HZ_TO_MSEC_NUM\t\t", 1000/cd, "\n"
-               print "#define HZ_TO_MSEC_DEN\t\t", hz/cd, "\n"
-               print "#define MSEC_TO_HZ_NUM\t\t", hz/cd, "\n"
-               print "#define MSEC_TO_HZ_DEN\t\t", 1000/cd, "\n"
-               print "\n"
-
-               s=fmuls(32,1000000,hz)
-               obase=16
-               print "#define HZ_TO_USEC_MUL32\tU64_C(0x", fmul(s,1000000,hz), ")\n"
-               print "#define HZ_TO_USEC_ADJ32\tU64_C(0x", fadj(s,1000000,hz), ")\n"
-               obase=10
-               print "#define HZ_TO_USEC_SHR32\t", s, "\n"
-
-               s=fmuls(32,hz,1000000)
-               obase=16
-               print "#define USEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000000), ")\n"
-               print "#define USEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000000), ")\n"
-               obase=10
-               print "#define USEC_TO_HZ_SHR32\t", s, "\n"
-
-               obase=10
-               cd=gcd(hz,1000000)
-               print "#define HZ_TO_USEC_NUM\t\t", 1000000/cd, "\n"
-               print "#define HZ_TO_USEC_DEN\t\t", hz/cd, "\n"
-               print "#define USEC_TO_HZ_NUM\t\t", hz/cd, "\n"
-               print "#define USEC_TO_HZ_DEN\t\t", 1000000/cd, "\n"
-               print "\n"
-
-               print "#endif /* KERNEL_TIMECONST_H */\n"
-       }
-       halt
-}
-
-timeconst(hz)
diff --git a/kernel/timer.c b/kernel/timer.c
deleted file mode 100644 (file)
index 3bb01a3..0000000
+++ /dev/null
@@ -1,1734 +0,0 @@
-/*
- *  linux/kernel/timer.c
- *
- *  Kernel internal timers
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
- *
- *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
- *              "A Kernel Model for Precision Timekeeping" by Dave Mills
- *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
- *              serialize accesses to xtime/lost_ticks).
- *                              Copyright (C) 1998  Andrea Arcangeli
- *  1999-03-10  Improved NTP compatibility by Ulrich Windl
- *  2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love
- *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
- *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
- *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
- */
-
-#include <linux/kernel_stat.h>
-#include <linux/export.h>
-#include <linux/interrupt.h>
-#include <linux/percpu.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/pid_namespace.h>
-#include <linux/notifier.h>
-#include <linux/thread_info.h>
-#include <linux/time.h>
-#include <linux/jiffies.h>
-#include <linux/posix-timers.h>
-#include <linux/cpu.h>
-#include <linux/syscalls.h>
-#include <linux/delay.h>
-#include <linux/tick.h>
-#include <linux/kallsyms.h>
-#include <linux/irq_work.h>
-#include <linux/sched.h>
-#include <linux/sched/sysctl.h>
-#include <linux/slab.h>
-#include <linux/compat.h>
-
-#include <asm/uaccess.h>
-#include <asm/unistd.h>
-#include <asm/div64.h>
-#include <asm/timex.h>
-#include <asm/io.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/timer.h>
-
-__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
-/*
- * per-CPU timer vector definitions:
- */
-#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
-#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
-#define TVN_SIZE (1 << TVN_BITS)
-#define TVR_SIZE (1 << TVR_BITS)
-#define TVN_MASK (TVN_SIZE - 1)
-#define TVR_MASK (TVR_SIZE - 1)
-#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
-
-struct tvec {
-       struct list_head vec[TVN_SIZE];
-};
-
-struct tvec_root {
-       struct list_head vec[TVR_SIZE];
-};
-
-struct tvec_base {
-       spinlock_t lock;
-       struct timer_list *running_timer;
-       unsigned long timer_jiffies;
-       unsigned long next_timer;
-       unsigned long active_timers;
-       unsigned long all_timers;
-       struct tvec_root tv1;
-       struct tvec tv2;
-       struct tvec tv3;
-       struct tvec tv4;
-       struct tvec tv5;
-} ____cacheline_aligned;
-
-struct tvec_base boot_tvec_bases;
-EXPORT_SYMBOL(boot_tvec_bases);
-static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
-
-/* Functions below help us manage 'deferrable' flag */
-static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
-{
-       return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE);
-}
-
-static inline unsigned int tbase_get_irqsafe(struct tvec_base *base)
-{
-       return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE);
-}
-
-static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
-{
-       return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK));
-}
-
-static inline void
-timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
-{
-       unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK;
-
-       timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
-}
-
-static unsigned long round_jiffies_common(unsigned long j, int cpu,
-               bool force_up)
-{
-       int rem;
-       unsigned long original = j;
-
-       /*
-        * We don't want all cpus firing their timers at once hitting the
-        * same lock or cachelines, so we skew each extra cpu with an extra
-        * 3 jiffies. This 3 jiffies came originally from the mm/ code which
-        * already did this.
-        * The skew is done by adding 3*cpunr, then round, then subtract this
-        * extra offset again.
-        */
-       j += cpu * 3;
-
-       rem = j % HZ;
-
-       /*
-        * If the target jiffie is just after a whole second (which can happen
-        * due to delays of the timer irq, long irq off times etc etc) then
-        * we should round down to the whole second, not up. Use 1/4th second
-        * as cutoff for this rounding as an extreme upper bound for this.
-        * But never round down if @force_up is set.
-        */
-       if (rem < HZ/4 && !force_up) /* round down */
-               j = j - rem;
-       else /* round up */
-               j = j - rem + HZ;
-
-       /* now that we have rounded, subtract the extra skew again */
-       j -= cpu * 3;
-
-       /*
-        * Make sure j is still in the future. Otherwise return the
-        * unmodified value.
-        */
-       return time_is_after_jiffies(j) ? j : original;
-}
-
-/**
- * __round_jiffies - function to round jiffies to a full second
- * @j: the time in (absolute) jiffies that should be rounded
- * @cpu: the processor number on which the timeout will happen
- *
- * __round_jiffies() rounds an absolute time in the future (in jiffies)
- * up or down to (approximately) full seconds. This is useful for timers
- * for which the exact time they fire does not matter too much, as long as
- * they fire approximately every X seconds.
- *
- * By rounding these timers to whole seconds, all such timers will fire
- * at the same time, rather than at various times spread out. The goal
- * of this is to have the CPU wake up less, which saves power.
- *
- * The exact rounding is skewed for each processor to avoid all
- * processors firing at the exact same time, which could lead
- * to lock contention or spurious cache line bouncing.
- *
- * The return value is the rounded version of the @j parameter.
- */
-unsigned long __round_jiffies(unsigned long j, int cpu)
-{
-       return round_jiffies_common(j, cpu, false);
-}
-EXPORT_SYMBOL_GPL(__round_jiffies);
-
-/**
- * __round_jiffies_relative - function to round jiffies to a full second
- * @j: the time in (relative) jiffies that should be rounded
- * @cpu: the processor number on which the timeout will happen
- *
- * __round_jiffies_relative() rounds a time delta  in the future (in jiffies)
- * up or down to (approximately) full seconds. This is useful for timers
- * for which the exact time they fire does not matter too much, as long as
- * they fire approximately every X seconds.
- *
- * By rounding these timers to whole seconds, all such timers will fire
- * at the same time, rather than at various times spread out. The goal
- * of this is to have the CPU wake up less, which saves power.
- *
- * The exact rounding is skewed for each processor to avoid all
- * processors firing at the exact same time, which could lead
- * to lock contention or spurious cache line bouncing.
- *
- * The return value is the rounded version of the @j parameter.
- */
-unsigned long __round_jiffies_relative(unsigned long j, int cpu)
-{
-       unsigned long j0 = jiffies;
-
-       /* Use j0 because jiffies might change while we run */
-       return round_jiffies_common(j + j0, cpu, false) - j0;
-}
-EXPORT_SYMBOL_GPL(__round_jiffies_relative);
-
-/**
- * round_jiffies - function to round jiffies to a full second
- * @j: the time in (absolute) jiffies that should be rounded
- *
- * round_jiffies() rounds an absolute time in the future (in jiffies)
- * up or down to (approximately) full seconds. This is useful for timers
- * for which the exact time they fire does not matter too much, as long as
- * they fire approximately every X seconds.
- *
- * By rounding these timers to whole seconds, all such timers will fire
- * at the same time, rather than at various times spread out. The goal
- * of this is to have the CPU wake up less, which saves power.
- *
- * The return value is the rounded version of the @j parameter.
- */
-unsigned long round_jiffies(unsigned long j)
-{
-       return round_jiffies_common(j, raw_smp_processor_id(), false);
-}
-EXPORT_SYMBOL_GPL(round_jiffies);
-
-/**
- * round_jiffies_relative - function to round jiffies to a full second
- * @j: the time in (relative) jiffies that should be rounded
- *
- * round_jiffies_relative() rounds a time delta  in the future (in jiffies)
- * up or down to (approximately) full seconds. This is useful for timers
- * for which the exact time they fire does not matter too much, as long as
- * they fire approximately every X seconds.
- *
- * By rounding these timers to whole seconds, all such timers will fire
- * at the same time, rather than at various times spread out. The goal
- * of this is to have the CPU wake up less, which saves power.
- *
- * The return value is the rounded version of the @j parameter.
- */
-unsigned long round_jiffies_relative(unsigned long j)
-{
-       return __round_jiffies_relative(j, raw_smp_processor_id());
-}
-EXPORT_SYMBOL_GPL(round_jiffies_relative);
-
-/**
- * __round_jiffies_up - function to round jiffies up to a full second
- * @j: the time in (absolute) jiffies that should be rounded
- * @cpu: the processor number on which the timeout will happen
- *
- * This is the same as __round_jiffies() except that it will never
- * round down.  This is useful for timeouts for which the exact time
- * of firing does not matter too much, as long as they don't fire too
- * early.
- */
-unsigned long __round_jiffies_up(unsigned long j, int cpu)
-{
-       return round_jiffies_common(j, cpu, true);
-}
-EXPORT_SYMBOL_GPL(__round_jiffies_up);
-
-/**
- * __round_jiffies_up_relative - function to round jiffies up to a full second
- * @j: the time in (relative) jiffies that should be rounded
- * @cpu: the processor number on which the timeout will happen
- *
- * This is the same as __round_jiffies_relative() except that it will never
- * round down.  This is useful for timeouts for which the exact time
- * of firing does not matter too much, as long as they don't fire too
- * early.
- */
-unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
-{
-       unsigned long j0 = jiffies;
-
-       /* Use j0 because jiffies might change while we run */
-       return round_jiffies_common(j + j0, cpu, true) - j0;
-}
-EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
-
-/**
- * round_jiffies_up - function to round jiffies up to a full second
- * @j: the time in (absolute) jiffies that should be rounded
- *
- * This is the same as round_jiffies() except that it will never
- * round down.  This is useful for timeouts for which the exact time
- * of firing does not matter too much, as long as they don't fire too
- * early.
- */
-unsigned long round_jiffies_up(unsigned long j)
-{
-       return round_jiffies_common(j, raw_smp_processor_id(), true);
-}
-EXPORT_SYMBOL_GPL(round_jiffies_up);
-
-/**
- * round_jiffies_up_relative - function to round jiffies up to a full second
- * @j: the time in (relative) jiffies that should be rounded
- *
- * This is the same as round_jiffies_relative() except that it will never
- * round down.  This is useful for timeouts for which the exact time
- * of firing does not matter too much, as long as they don't fire too
- * early.
- */
-unsigned long round_jiffies_up_relative(unsigned long j)
-{
-       return __round_jiffies_up_relative(j, raw_smp_processor_id());
-}
-EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
-
-/**
- * set_timer_slack - set the allowed slack for a timer
- * @timer: the timer to be modified
- * @slack_hz: the amount of time (in jiffies) allowed for rounding
- *
- * Set the amount of time, in jiffies, that a certain timer has
- * in terms of slack. By setting this value, the timer subsystem
- * will schedule the actual timer somewhere between
- * the time mod_timer() asks for, and that time plus the slack.
- *
- * By setting the slack to -1, a percentage of the delay is used
- * instead.
- */
-void set_timer_slack(struct timer_list *timer, int slack_hz)
-{
-       timer->slack = slack_hz;
-}
-EXPORT_SYMBOL_GPL(set_timer_slack);
-
-/*
- * If the list is empty, catch up ->timer_jiffies to the current time.
- * The caller must hold the tvec_base lock.  Returns true if the list
- * was empty and therefore ->timer_jiffies was updated.
- */
-static bool catchup_timer_jiffies(struct tvec_base *base)
-{
-       if (!base->all_timers) {
-               base->timer_jiffies = jiffies;
-               return true;
-       }
-       return false;
-}
-
-static void
-__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
-{
-       unsigned long expires = timer->expires;
-       unsigned long idx = expires - base->timer_jiffies;
-       struct list_head *vec;
-
-       if (idx < TVR_SIZE) {
-               int i = expires & TVR_MASK;
-               vec = base->tv1.vec + i;
-       } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
-               int i = (expires >> TVR_BITS) & TVN_MASK;
-               vec = base->tv2.vec + i;
-       } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
-               int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
-               vec = base->tv3.vec + i;
-       } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
-               int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
-               vec = base->tv4.vec + i;
-       } else if ((signed long) idx < 0) {
-               /*
-                * Can happen if you add a timer with expires == jiffies,
-                * or you set a timer to go off in the past
-                */
-               vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
-       } else {
-               int i;
-               /* If the timeout is larger than MAX_TVAL (on 64-bit
-                * architectures or with CONFIG_BASE_SMALL=1) then we
-                * use the maximum timeout.
-                */
-               if (idx > MAX_TVAL) {
-                       idx = MAX_TVAL;
-                       expires = idx + base->timer_jiffies;
-               }
-               i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
-               vec = base->tv5.vec + i;
-       }
-       /*
-        * Timers are FIFO:
-        */
-       list_add_tail(&timer->entry, vec);
-}
-
-static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
-{
-       (void)catchup_timer_jiffies(base);
-       __internal_add_timer(base, timer);
-       /*
-        * Update base->active_timers and base->next_timer
-        */
-       if (!tbase_get_deferrable(timer->base)) {
-               if (!base->active_timers++ ||
-                   time_before(timer->expires, base->next_timer))
-                       base->next_timer = timer->expires;
-       }
-       base->all_timers++;
-}
-
-#ifdef CONFIG_TIMER_STATS
-void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
-{
-       if (timer->start_site)
-               return;
-
-       timer->start_site = addr;
-       memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
-       timer->start_pid = current->pid;
-}
-
-static void timer_stats_account_timer(struct timer_list *timer)
-{
-       unsigned int flag = 0;
-
-       if (likely(!timer->start_site))
-               return;
-       if (unlikely(tbase_get_deferrable(timer->base)))
-               flag |= TIMER_STATS_FLAG_DEFERRABLE;
-
-       timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
-                                timer->function, timer->start_comm, flag);
-}
-
-#else
-static void timer_stats_account_timer(struct timer_list *timer) {}
-#endif
-
-#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
-
-static struct debug_obj_descr timer_debug_descr;
-
-static void *timer_debug_hint(void *addr)
-{
-       return ((struct timer_list *) addr)->function;
-}
-
-/*
- * fixup_init is called when:
- * - an active object is initialized
- */
-static int timer_fixup_init(void *addr, enum debug_obj_state state)
-{
-       struct timer_list *timer = addr;
-
-       switch (state) {
-       case ODEBUG_STATE_ACTIVE:
-               del_timer_sync(timer);
-               debug_object_init(timer, &timer_debug_descr);
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-/* Stub timer callback for improperly used timers. */
-static void stub_timer(unsigned long data)
-{
-       WARN_ON(1);
-}
-
-/*
- * fixup_activate is called when:
- * - an active object is activated
- * - an unknown object is activated (might be a statically initialized object)
- */
-static int timer_fixup_activate(void *addr, enum debug_obj_state state)
-{
-       struct timer_list *timer = addr;
-
-       switch (state) {
-
-       case ODEBUG_STATE_NOTAVAILABLE:
-               /*
-                * This is not really a fixup. The timer was
-                * statically initialized. We just make sure that it
-                * is tracked in the object tracker.
-                */
-               if (timer->entry.next == NULL &&
-                   timer->entry.prev == TIMER_ENTRY_STATIC) {
-                       debug_object_init(timer, &timer_debug_descr);
-                       debug_object_activate(timer, &timer_debug_descr);
-                       return 0;
-               } else {
-                       setup_timer(timer, stub_timer, 0);
-                       return 1;
-               }
-               return 0;
-
-       case ODEBUG_STATE_ACTIVE:
-               WARN_ON(1);
-
-       default:
-               return 0;
-       }
-}
-
-/*
- * fixup_free is called when:
- * - an active object is freed
- */
-static int timer_fixup_free(void *addr, enum debug_obj_state state)
-{
-       struct timer_list *timer = addr;
-
-       switch (state) {
-       case ODEBUG_STATE_ACTIVE:
-               del_timer_sync(timer);
-               debug_object_free(timer, &timer_debug_descr);
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-/*
- * fixup_assert_init is called when:
- * - an untracked/uninit-ed object is found
- */
-static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
-{
-       struct timer_list *timer = addr;
-
-       switch (state) {
-       case ODEBUG_STATE_NOTAVAILABLE:
-               if (timer->entry.prev == TIMER_ENTRY_STATIC) {
-                       /*
-                        * This is not really a fixup. The timer was
-                        * statically initialized. We just make sure that it
-                        * is tracked in the object tracker.
-                        */
-                       debug_object_init(timer, &timer_debug_descr);
-                       return 0;
-               } else {
-                       setup_timer(timer, stub_timer, 0);
-                       return 1;
-               }
-       default:
-               return 0;
-       }
-}
-
-static struct debug_obj_descr timer_debug_descr = {
-       .name                   = "timer_list",
-       .debug_hint             = timer_debug_hint,
-       .fixup_init             = timer_fixup_init,
-       .fixup_activate         = timer_fixup_activate,
-       .fixup_free             = timer_fixup_free,
-       .fixup_assert_init      = timer_fixup_assert_init,
-};
-
-static inline void debug_timer_init(struct timer_list *timer)
-{
-       debug_object_init(timer, &timer_debug_descr);
-}
-
-static inline void debug_timer_activate(struct timer_list *timer)
-{
-       debug_object_activate(timer, &timer_debug_descr);
-}
-
-static inline void debug_timer_deactivate(struct timer_list *timer)
-{
-       debug_object_deactivate(timer, &timer_debug_descr);
-}
-
-static inline void debug_timer_free(struct timer_list *timer)
-{
-       debug_object_free(timer, &timer_debug_descr);
-}
-
-static inline void debug_timer_assert_init(struct timer_list *timer)
-{
-       debug_object_assert_init(timer, &timer_debug_descr);
-}
-
-static void do_init_timer(struct timer_list *timer, unsigned int flags,
-                         const char *name, struct lock_class_key *key);
-
-void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags,
-                            const char *name, struct lock_class_key *key)
-{
-       debug_object_init_on_stack(timer, &timer_debug_descr);
-       do_init_timer(timer, flags, name, key);
-}
-EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
-
-void destroy_timer_on_stack(struct timer_list *timer)
-{
-       debug_object_free(timer, &timer_debug_descr);
-}
-EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
-
-#else
-static inline void debug_timer_init(struct timer_list *timer) { }
-static inline void debug_timer_activate(struct timer_list *timer) { }
-static inline void debug_timer_deactivate(struct timer_list *timer) { }
-static inline void debug_timer_assert_init(struct timer_list *timer) { }
-#endif
-
-static inline void debug_init(struct timer_list *timer)
-{
-       debug_timer_init(timer);
-       trace_timer_init(timer);
-}
-
-static inline void
-debug_activate(struct timer_list *timer, unsigned long expires)
-{
-       debug_timer_activate(timer);
-       trace_timer_start(timer, expires);
-}
-
-static inline void debug_deactivate(struct timer_list *timer)
-{
-       debug_timer_deactivate(timer);
-       trace_timer_cancel(timer);
-}
-
-static inline void debug_assert_init(struct timer_list *timer)
-{
-       debug_timer_assert_init(timer);
-}
-
-static void do_init_timer(struct timer_list *timer, unsigned int flags,
-                         const char *name, struct lock_class_key *key)
-{
-       struct tvec_base *base = __raw_get_cpu_var(tvec_bases);
-
-       timer->entry.next = NULL;
-       timer->base = (void *)((unsigned long)base | flags);
-       timer->slack = -1;
-#ifdef CONFIG_TIMER_STATS
-       timer->start_site = NULL;
-       timer->start_pid = -1;
-       memset(timer->start_comm, 0, TASK_COMM_LEN);
-#endif
-       lockdep_init_map(&timer->lockdep_map, name, key, 0);
-}
-
-/**
- * init_timer_key - initialize a timer
- * @timer: the timer to be initialized
- * @flags: timer flags
- * @name: name of the timer
- * @key: lockdep class key of the fake lock used for tracking timer
- *       sync lock dependencies
- *
- * init_timer_key() must be done to a timer prior calling *any* of the
- * other timer functions.
- */
-void init_timer_key(struct timer_list *timer, unsigned int flags,
-                   const char *name, struct lock_class_key *key)
-{
-       debug_init(timer);
-       do_init_timer(timer, flags, name, key);
-}
-EXPORT_SYMBOL(init_timer_key);
-
-static inline void detach_timer(struct timer_list *timer, bool clear_pending)
-{
-       struct list_head *entry = &timer->entry;
-
-       debug_deactivate(timer);
-
-       __list_del(entry->prev, entry->next);
-       if (clear_pending)
-               entry->next = NULL;
-       entry->prev = LIST_POISON2;
-}
-
-static inline void
-detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
-{
-       detach_timer(timer, true);
-       if (!tbase_get_deferrable(timer->base))
-               base->active_timers--;
-       base->all_timers--;
-       (void)catchup_timer_jiffies(base);
-}
-
-static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
-                            bool clear_pending)
-{
-       if (!timer_pending(timer))
-               return 0;
-
-       detach_timer(timer, clear_pending);
-       if (!tbase_get_deferrable(timer->base)) {
-               base->active_timers--;
-               if (timer->expires == base->next_timer)
-                       base->next_timer = base->timer_jiffies;
-       }
-       base->all_timers--;
-       (void)catchup_timer_jiffies(base);
-       return 1;
-}
-
-/*
- * We are using hashed locking: holding per_cpu(tvec_bases).lock
- * means that all timers which are tied to this base via timer->base are
- * locked, and the base itself is locked too.
- *
- * So __run_timers/migrate_timers can safely modify all timers which could
- * be found on ->tvX lists.
- *
- * When the timer's base is locked, and the timer removed from list, it is
- * possible to set timer->base = NULL and drop the lock: the timer remains
- * locked.
- */
-static struct tvec_base *lock_timer_base(struct timer_list *timer,
-                                       unsigned long *flags)
-       __acquires(timer->base->lock)
-{
-       struct tvec_base *base;
-
-       for (;;) {
-               struct tvec_base *prelock_base = timer->base;
-               base = tbase_get_base(prelock_base);
-               if (likely(base != NULL)) {
-                       spin_lock_irqsave(&base->lock, *flags);
-                       if (likely(prelock_base == timer->base))
-                               return base;
-                       /* The timer has migrated to another CPU */
-                       spin_unlock_irqrestore(&base->lock, *flags);
-               }
-               cpu_relax();
-       }
-}
-
-static inline int
-__mod_timer(struct timer_list *timer, unsigned long expires,
-                                               bool pending_only, int pinned)
-{
-       struct tvec_base *base, *new_base;
-       unsigned long flags;
-       int ret = 0 , cpu;
-
-       timer_stats_timer_set_start_info(timer);
-       BUG_ON(!timer->function);
-
-       base = lock_timer_base(timer, &flags);
-
-       ret = detach_if_pending(timer, base, false);
-       if (!ret && pending_only)
-               goto out_unlock;
-
-       debug_activate(timer, expires);
-
-       cpu = get_nohz_timer_target(pinned);
-       new_base = per_cpu(tvec_bases, cpu);
-
-       if (base != new_base) {
-               /*
-                * We are trying to schedule the timer on the local CPU.
-                * However we can't change timer's base while it is running,
-                * otherwise del_timer_sync() can't detect that the timer's
-                * handler yet has not finished. This also guarantees that
-                * the timer is serialized wrt itself.
-                */
-               if (likely(base->running_timer != timer)) {
-                       /* See the comment in lock_timer_base() */
-                       timer_set_base(timer, NULL);
-                       spin_unlock(&base->lock);
-                       base = new_base;
-                       spin_lock(&base->lock);
-                       timer_set_base(timer, base);
-               }
-       }
-
-       timer->expires = expires;
-       internal_add_timer(base, timer);
-
-out_unlock:
-       spin_unlock_irqrestore(&base->lock, flags);
-
-       return ret;
-}
-
-/**
- * mod_timer_pending - modify a pending timer's timeout
- * @timer: the pending timer to be modified
- * @expires: new timeout in jiffies
- *
- * mod_timer_pending() is the same for pending timers as mod_timer(),
- * but will not re-activate and modify already deleted timers.
- *
- * It is useful for unserialized use of timers.
- */
-int mod_timer_pending(struct timer_list *timer, unsigned long expires)
-{
-       return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
-}
-EXPORT_SYMBOL(mod_timer_pending);
-
-/*
- * Decide where to put the timer while taking the slack into account
- *
- * Algorithm:
- *   1) calculate the maximum (absolute) time
- *   2) calculate the highest bit where the expires and new max are different
- *   3) use this bit to make a mask
- *   4) use the bitmask to round down the maximum time, so that all last
- *      bits are zeros
- */
-static inline
-unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
-{
-       unsigned long expires_limit, mask;
-       int bit;
-
-       if (timer->slack >= 0) {
-               expires_limit = expires + timer->slack;
-       } else {
-               long delta = expires - jiffies;
-
-               if (delta < 256)
-                       return expires;
-
-               expires_limit = expires + delta / 256;
-       }
-       mask = expires ^ expires_limit;
-       if (mask == 0)
-               return expires;
-
-       bit = find_last_bit(&mask, BITS_PER_LONG);
-
-       mask = (1UL << bit) - 1;
-
-       expires_limit = expires_limit & ~(mask);
-
-       return expires_limit;
-}
-
-/**
- * mod_timer - modify a timer's timeout
- * @timer: the timer to be modified
- * @expires: new timeout in jiffies
- *
- * mod_timer() is a more efficient way to update the expire field of an
- * active timer (if the timer is inactive it will be activated)
- *
- * mod_timer(timer, expires) is equivalent to:
- *
- *     del_timer(timer); timer->expires = expires; add_timer(timer);
- *
- * Note that if there are multiple unserialized concurrent users of the
- * same timer, then mod_timer() is the only safe way to modify the timeout,
- * since add_timer() cannot modify an already running timer.
- *
- * The function returns whether it has modified a pending timer or not.
- * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
- * active timer returns 1.)
- */
-int mod_timer(struct timer_list *timer, unsigned long expires)
-{
-       expires = apply_slack(timer, expires);
-
-       /*
-        * This is a common optimization triggered by the
-        * networking code - if the timer is re-modified
-        * to be the same thing then just return:
-        */
-       if (timer_pending(timer) && timer->expires == expires)
-               return 1;
-
-       return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
-}
-EXPORT_SYMBOL(mod_timer);
-
-/**
- * mod_timer_pinned - modify a timer's timeout
- * @timer: the timer to be modified
- * @expires: new timeout in jiffies
- *
- * mod_timer_pinned() is a way to update the expire field of an
- * active timer (if the timer is inactive it will be activated)
- * and to ensure that the timer is scheduled on the current CPU.
- *
- * Note that this does not prevent the timer from being migrated
- * when the current CPU goes offline.  If this is a problem for
- * you, use CPU-hotplug notifiers to handle it correctly, for
- * example, cancelling the timer when the corresponding CPU goes
- * offline.
- *
- * mod_timer_pinned(timer, expires) is equivalent to:
- *
- *     del_timer(timer); timer->expires = expires; add_timer(timer);
- */
-int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
-{
-       if (timer->expires == expires && timer_pending(timer))
-               return 1;
-
-       return __mod_timer(timer, expires, false, TIMER_PINNED);
-}
-EXPORT_SYMBOL(mod_timer_pinned);
-
-/**
- * add_timer - start a timer
- * @timer: the timer to be added
- *
- * The kernel will do a ->function(->data) callback from the
- * timer interrupt at the ->expires point in the future. The
- * current time is 'jiffies'.
- *
- * The timer's ->expires, ->function (and if the handler uses it, ->data)
- * fields must be set prior calling this function.
- *
- * Timers with an ->expires field in the past will be executed in the next
- * timer tick.
- */
-void add_timer(struct timer_list *timer)
-{
-       BUG_ON(timer_pending(timer));
-       mod_timer(timer, timer->expires);
-}
-EXPORT_SYMBOL(add_timer);
-
-/**
- * add_timer_on - start a timer on a particular CPU
- * @timer: the timer to be added
- * @cpu: the CPU to start it on
- *
- * This is not very scalable on SMP. Double adds are not possible.
- */
-void add_timer_on(struct timer_list *timer, int cpu)
-{
-       struct tvec_base *base = per_cpu(tvec_bases, cpu);
-       unsigned long flags;
-
-       timer_stats_timer_set_start_info(timer);
-       BUG_ON(timer_pending(timer) || !timer->function);
-       spin_lock_irqsave(&base->lock, flags);
-       timer_set_base(timer, base);
-       debug_activate(timer, timer->expires);
-       internal_add_timer(base, timer);
-       /*
-        * Check whether the other CPU is in dynticks mode and needs
-        * to be triggered to reevaluate the timer wheel.
-        * We are protected against the other CPU fiddling
-        * with the timer by holding the timer base lock. This also
-        * makes sure that a CPU on the way to stop its tick can not
-        * evaluate the timer wheel.
-        *
-        * Spare the IPI for deferrable timers on idle targets though.
-        * The next busy ticks will take care of it. Except full dynticks
-        * require special care against races with idle_cpu(), lets deal
-        * with that later.
-        */
-       if (!tbase_get_deferrable(timer->base) || tick_nohz_full_cpu(cpu))
-               wake_up_nohz_cpu(cpu);
-
-       spin_unlock_irqrestore(&base->lock, flags);
-}
-EXPORT_SYMBOL_GPL(add_timer_on);
-
-/**
- * del_timer - deactive a timer.
- * @timer: the timer to be deactivated
- *
- * del_timer() deactivates a timer - this works on both active and inactive
- * timers.
- *
- * The function returns whether it has deactivated a pending timer or not.
- * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
- * active timer returns 1.)
- */
-int del_timer(struct timer_list *timer)
-{
-       struct tvec_base *base;
-       unsigned long flags;
-       int ret = 0;
-
-       debug_assert_init(timer);
-
-       timer_stats_timer_clear_start_info(timer);
-       if (timer_pending(timer)) {
-               base = lock_timer_base(timer, &flags);
-               ret = detach_if_pending(timer, base, true);
-               spin_unlock_irqrestore(&base->lock, flags);
-       }
-
-       return ret;
-}
-EXPORT_SYMBOL(del_timer);
-
-/**
- * try_to_del_timer_sync - Try to deactivate a timer
- * @timer: timer do del
- *
- * This function tries to deactivate a timer. Upon successful (ret >= 0)
- * exit the timer is not queued and the handler is not running on any CPU.
- */
-int try_to_del_timer_sync(struct timer_list *timer)
-{
-       struct tvec_base *base;
-       unsigned long flags;
-       int ret = -1;
-
-       debug_assert_init(timer);
-
-       base = lock_timer_base(timer, &flags);
-
-       if (base->running_timer != timer) {
-               timer_stats_timer_clear_start_info(timer);
-               ret = detach_if_pending(timer, base, true);
-       }
-       spin_unlock_irqrestore(&base->lock, flags);
-
-       return ret;
-}
-EXPORT_SYMBOL(try_to_del_timer_sync);
-
-#ifdef CONFIG_SMP
-/**
- * del_timer_sync - deactivate a timer and wait for the handler to finish.
- * @timer: the timer to be deactivated
- *
- * This function only differs from del_timer() on SMP: besides deactivating
- * the timer it also makes sure the handler has finished executing on other
- * CPUs.
- *
- * Synchronization rules: Callers must prevent restarting of the timer,
- * otherwise this function is meaningless. It must not be called from
- * interrupt contexts unless the timer is an irqsafe one. The caller must
- * not hold locks which would prevent completion of the timer's
- * handler. The timer's handler must not call add_timer_on(). Upon exit the
- * timer is not queued and the handler is not running on any CPU.
- *
- * Note: For !irqsafe timers, you must not hold locks that are held in
- *   interrupt context while calling this function. Even if the lock has
- *   nothing to do with the timer in question.  Here's why:
- *
- *    CPU0                             CPU1
- *    ----                             ----
- *                                   <SOFTIRQ>
- *                                   call_timer_fn();
- *                                     base->running_timer = mytimer;
- *  spin_lock_irq(somelock);
- *                                     <IRQ>
- *                                        spin_lock(somelock);
- *  del_timer_sync(mytimer);
- *   while (base->running_timer == mytimer);
- *
- * Now del_timer_sync() will never return and never release somelock.
- * The interrupt on the other CPU is waiting to grab somelock but
- * it has interrupted the softirq that CPU0 is waiting to finish.
- *
- * The function returns whether it has deactivated a pending timer or not.
- */
-int del_timer_sync(struct timer_list *timer)
-{
-#ifdef CONFIG_LOCKDEP
-       unsigned long flags;
-
-       /*
-        * If lockdep gives a backtrace here, please reference
-        * the synchronization rules above.
-        */
-       local_irq_save(flags);
-       lock_map_acquire(&timer->lockdep_map);
-       lock_map_release(&timer->lockdep_map);
-       local_irq_restore(flags);
-#endif
-       /*
-        * don't use it in hardirq context, because it
-        * could lead to deadlock.
-        */
-       WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base));
-       for (;;) {
-               int ret = try_to_del_timer_sync(timer);
-               if (ret >= 0)
-                       return ret;
-               cpu_relax();
-       }
-}
-EXPORT_SYMBOL(del_timer_sync);
-#endif
-
-static int cascade(struct tvec_base *base, struct tvec *tv, int index)
-{
-       /* cascade all the timers from tv up one level */
-       struct timer_list *timer, *tmp;
-       struct list_head tv_list;
-
-       list_replace_init(tv->vec + index, &tv_list);
-
-       /*
-        * We are removing _all_ timers from the list, so we
-        * don't have to detach them individually.
-        */
-       list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
-               BUG_ON(tbase_get_base(timer->base) != base);
-               /* No accounting, while moving them */
-               __internal_add_timer(base, timer);
-       }
-
-       return index;
-}
-
-static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
-                         unsigned long data)
-{
-       int count = preempt_count();
-
-#ifdef CONFIG_LOCKDEP
-       /*
-        * It is permissible to free the timer from inside the
-        * function that is called from it, this we need to take into
-        * account for lockdep too. To avoid bogus "held lock freed"
-        * warnings as well as problems when looking into
-        * timer->lockdep_map, make a copy and use that here.
-        */
-       struct lockdep_map lockdep_map;
-
-       lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
-#endif
-       /*
-        * Couple the lock chain with the lock chain at
-        * del_timer_sync() by acquiring the lock_map around the fn()
-        * call here and in del_timer_sync().
-        */
-       lock_map_acquire(&lockdep_map);
-
-       trace_timer_expire_entry(timer);
-       fn(data);
-       trace_timer_expire_exit(timer);
-
-       lock_map_release(&lockdep_map);
-
-       if (count != preempt_count()) {
-               WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
-                         fn, count, preempt_count());
-               /*
-                * Restore the preempt count. That gives us a decent
-                * chance to survive and extract information. If the
-                * callback kept a lock held, bad luck, but not worse
-                * than the BUG() we had.
-                */
-               preempt_count_set(count);
-       }
-}
-
-#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
-
-/**
- * __run_timers - run all expired timers (if any) on this CPU.
- * @base: the timer vector to be processed.
- *
- * This function cascades all vectors and executes all expired timer
- * vectors.
- */
-static inline void __run_timers(struct tvec_base *base)
-{
-       struct timer_list *timer;
-
-       spin_lock_irq(&base->lock);
-       if (catchup_timer_jiffies(base)) {
-               spin_unlock_irq(&base->lock);
-               return;
-       }
-       while (time_after_eq(jiffies, base->timer_jiffies)) {
-               struct list_head work_list;
-               struct list_head *head = &work_list;
-               int index = base->timer_jiffies & TVR_MASK;
-
-               /*
-                * Cascade timers:
-                */
-               if (!index &&
-                       (!cascade(base, &base->tv2, INDEX(0))) &&
-                               (!cascade(base, &base->tv3, INDEX(1))) &&
-                                       !cascade(base, &base->tv4, INDEX(2)))
-                       cascade(base, &base->tv5, INDEX(3));
-               ++base->timer_jiffies;
-               list_replace_init(base->tv1.vec + index, head);
-               while (!list_empty(head)) {
-                       void (*fn)(unsigned long);
-                       unsigned long data;
-                       bool irqsafe;
-
-                       timer = list_first_entry(head, struct timer_list,entry);
-                       fn = timer->function;
-                       data = timer->data;
-                       irqsafe = tbase_get_irqsafe(timer->base);
-
-                       timer_stats_account_timer(timer);
-
-                       base->running_timer = timer;
-                       detach_expired_timer(timer, base);
-
-                       if (irqsafe) {
-                               spin_unlock(&base->lock);
-                               call_timer_fn(timer, fn, data);
-                               spin_lock(&base->lock);
-                       } else {
-                               spin_unlock_irq(&base->lock);
-                               call_timer_fn(timer, fn, data);
-                               spin_lock_irq(&base->lock);
-                       }
-               }
-       }
-       base->running_timer = NULL;
-       spin_unlock_irq(&base->lock);
-}
-
-#ifdef CONFIG_NO_HZ_COMMON
-/*
- * Find out when the next timer event is due to happen. This
- * is used on S/390 to stop all activity when a CPU is idle.
- * This function needs to be called with interrupts disabled.
- */
-static unsigned long __next_timer_interrupt(struct tvec_base *base)
-{
-       unsigned long timer_jiffies = base->timer_jiffies;
-       unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
-       int index, slot, array, found = 0;
-       struct timer_list *nte;
-       struct tvec *varray[4];
-
-       /* Look for timer events in tv1. */
-       index = slot = timer_jiffies & TVR_MASK;
-       do {
-               list_for_each_entry(nte, base->tv1.vec + slot, entry) {
-                       if (tbase_get_deferrable(nte->base))
-                               continue;
-
-                       found = 1;
-                       expires = nte->expires;
-                       /* Look at the cascade bucket(s)? */
-                       if (!index || slot < index)
-                               goto cascade;
-                       return expires;
-               }
-               slot = (slot + 1) & TVR_MASK;
-       } while (slot != index);
-
-cascade:
-       /* Calculate the next cascade event */
-       if (index)
-               timer_jiffies += TVR_SIZE - index;
-       timer_jiffies >>= TVR_BITS;
-
-       /* Check tv2-tv5. */
-       varray[0] = &base->tv2;
-       varray[1] = &base->tv3;
-       varray[2] = &base->tv4;
-       varray[3] = &base->tv5;
-
-       for (array = 0; array < 4; array++) {
-               struct tvec *varp = varray[array];
-
-               index = slot = timer_jiffies & TVN_MASK;
-               do {
-                       list_for_each_entry(nte, varp->vec + slot, entry) {
-                               if (tbase_get_deferrable(nte->base))
-                                       continue;
-
-                               found = 1;
-                               if (time_before(nte->expires, expires))
-                                       expires = nte->expires;
-                       }
-                       /*
-                        * Do we still search for the first timer or are
-                        * we looking up the cascade buckets ?
-                        */
-                       if (found) {
-                               /* Look at the cascade bucket(s)? */
-                               if (!index || slot < index)
-                                       break;
-                               return expires;
-                       }
-                       slot = (slot + 1) & TVN_MASK;
-               } while (slot != index);
-
-               if (index)
-                       timer_jiffies += TVN_SIZE - index;
-               timer_jiffies >>= TVN_BITS;
-       }
-       return expires;
-}
-
-/*
- * Check, if the next hrtimer event is before the next timer wheel
- * event:
- */
-static unsigned long cmp_next_hrtimer_event(unsigned long now,
-                                           unsigned long expires)
-{
-       ktime_t hr_delta = hrtimer_get_next_event();
-       struct timespec tsdelta;
-       unsigned long delta;
-
-       if (hr_delta.tv64 == KTIME_MAX)
-               return expires;
-
-       /*
-        * Expired timer available, let it expire in the next tick
-        */
-       if (hr_delta.tv64 <= 0)
-               return now + 1;
-
-       tsdelta = ktime_to_timespec(hr_delta);
-       delta = timespec_to_jiffies(&tsdelta);
-
-       /*
-        * Limit the delta to the max value, which is checked in
-        * tick_nohz_stop_sched_tick():
-        */
-       if (delta > NEXT_TIMER_MAX_DELTA)
-               delta = NEXT_TIMER_MAX_DELTA;
-
-       /*
-        * Take rounding errors in to account and make sure, that it
-        * expires in the next tick. Otherwise we go into an endless
-        * ping pong due to tick_nohz_stop_sched_tick() retriggering
-        * the timer softirq
-        */
-       if (delta < 1)
-               delta = 1;
-       now += delta;
-       if (time_before(now, expires))
-               return now;
-       return expires;
-}
-
-/**
- * get_next_timer_interrupt - return the jiffy of the next pending timer
- * @now: current time (in jiffies)
- */
-unsigned long get_next_timer_interrupt(unsigned long now)
-{
-       struct tvec_base *base = __this_cpu_read(tvec_bases);
-       unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
-
-       /*
-        * Pretend that there is no timer pending if the cpu is offline.
-        * Possible pending timers will be migrated later to an active cpu.
-        */
-       if (cpu_is_offline(smp_processor_id()))
-               return expires;
-
-       spin_lock(&base->lock);
-       if (base->active_timers) {
-               if (time_before_eq(base->next_timer, base->timer_jiffies))
-                       base->next_timer = __next_timer_interrupt(base);
-               expires = base->next_timer;
-       }
-       spin_unlock(&base->lock);
-
-       if (time_before_eq(expires, now))
-               return now;
-
-       return cmp_next_hrtimer_event(now, expires);
-}
-#endif
-
-/*
- * Called from the timer interrupt handler to charge one tick to the current
- * process.  user_tick is 1 if the tick is user time, 0 for system.
- */
-void update_process_times(int user_tick)
-{
-       struct task_struct *p = current;
-       int cpu = smp_processor_id();
-
-       /* Note: this timer irq context must be accounted for as well. */
-       account_process_tick(p, user_tick);
-       run_local_timers();
-       rcu_check_callbacks(cpu, user_tick);
-#ifdef CONFIG_IRQ_WORK
-       if (in_irq())
-               irq_work_run();
-#endif
-       scheduler_tick();
-       run_posix_cpu_timers(p);
-}
-
-/*
- * This function runs timers and the timer-tq in bottom half context.
- */
-static void run_timer_softirq(struct softirq_action *h)
-{
-       struct tvec_base *base = __this_cpu_read(tvec_bases);
-
-       hrtimer_run_pending();
-
-       if (time_after_eq(jiffies, base->timer_jiffies))
-               __run_timers(base);
-}
-
-/*
- * Called by the local, per-CPU timer interrupt on SMP.
- */
-void run_local_timers(void)
-{
-       hrtimer_run_queues();
-       raise_softirq(TIMER_SOFTIRQ);
-}
-
-#ifdef __ARCH_WANT_SYS_ALARM
-
-/*
- * For backwards compatibility?  This can be done in libc so Alpha
- * and all newer ports shouldn't need it.
- */
-SYSCALL_DEFINE1(alarm, unsigned int, seconds)
-{
-       return alarm_setitimer(seconds);
-}
-
-#endif
-
-static void process_timeout(unsigned long __data)
-{
-       wake_up_process((struct task_struct *)__data);
-}
-
-/**
- * schedule_timeout - sleep until timeout
- * @timeout: timeout value in jiffies
- *
- * Make the current task sleep until @timeout jiffies have
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
- * pass before the routine returns. The routine will return 0
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task. In this case the remaining time
- * in jiffies will be returned, or 0 if the timer expired in time
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
- * the CPU away without a bound on the timeout. In this case the return
- * value will be %MAX_SCHEDULE_TIMEOUT.
- *
- * In all cases the return value is guaranteed to be non-negative.
- */
-signed long __sched schedule_timeout(signed long timeout)
-{
-       struct timer_list timer;
-       unsigned long expire;
-
-       switch (timeout)
-       {
-       case MAX_SCHEDULE_TIMEOUT:
-               /*
-                * These two special cases are useful to be comfortable
-                * in the caller. Nothing more. We could take
-                * MAX_SCHEDULE_TIMEOUT from one of the negative value
-                * but I' d like to return a valid offset (>=0) to allow
-                * the caller to do everything it want with the retval.
-                */
-               schedule();
-               goto out;
-       default:
-               /*
-                * Another bit of PARANOID. Note that the retval will be
-                * 0 since no piece of kernel is supposed to do a check
-                * for a negative retval of schedule_timeout() (since it
-                * should never happens anyway). You just have the printk()
-                * that will tell you if something is gone wrong and where.
-                */
-               if (timeout < 0) {
-                       printk(KERN_ERR "schedule_timeout: wrong timeout "
-                               "value %lx\n", timeout);
-                       dump_stack();
-                       current->state = TASK_RUNNING;
-                       goto out;
-               }
-       }
-
-       expire = timeout + jiffies;
-
-       setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
-       __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
-       schedule();
-       del_singleshot_timer_sync(&timer);
-
-       /* Remove the timer from the object tracker */
-       destroy_timer_on_stack(&timer);
-
-       timeout = expire - jiffies;
-
- out:
-       return timeout < 0 ? 0 : timeout;
-}
-EXPORT_SYMBOL(schedule_timeout);
-
-/*
- * We can use __set_current_state() here because schedule_timeout() calls
- * schedule() unconditionally.
- */
-signed long __sched schedule_timeout_interruptible(signed long timeout)
-{
-       __set_current_state(TASK_INTERRUPTIBLE);
-       return schedule_timeout(timeout);
-}
-EXPORT_SYMBOL(schedule_timeout_interruptible);
-
-signed long __sched schedule_timeout_killable(signed long timeout)
-{
-       __set_current_state(TASK_KILLABLE);
-       return schedule_timeout(timeout);
-}
-EXPORT_SYMBOL(schedule_timeout_killable);
-
-signed long __sched schedule_timeout_uninterruptible(signed long timeout)
-{
-       __set_current_state(TASK_UNINTERRUPTIBLE);
-       return schedule_timeout(timeout);
-}
-EXPORT_SYMBOL(schedule_timeout_uninterruptible);
-
-static int init_timers_cpu(int cpu)
-{
-       int j;
-       struct tvec_base *base;
-       static char tvec_base_done[NR_CPUS];
-
-       if (!tvec_base_done[cpu]) {
-               static char boot_done;
-
-               if (boot_done) {
-                       /*
-                        * The APs use this path later in boot
-                        */
-                       base = kzalloc_node(sizeof(*base), GFP_KERNEL,
-                                           cpu_to_node(cpu));
-                       if (!base)
-                               return -ENOMEM;
-
-                       /* Make sure tvec_base has TIMER_FLAG_MASK bits free */
-                       if (WARN_ON(base != tbase_get_base(base))) {
-                               kfree(base);
-                               return -ENOMEM;
-                       }
-                       per_cpu(tvec_bases, cpu) = base;
-               } else {
-                       /*
-                        * This is for the boot CPU - we use compile-time
-                        * static initialisation because per-cpu memory isn't
-                        * ready yet and because the memory allocators are not
-                        * initialised either.
-                        */
-                       boot_done = 1;
-                       base = &boot_tvec_bases;
-               }
-               spin_lock_init(&base->lock);
-               tvec_base_done[cpu] = 1;
-       } else {
-               base = per_cpu(tvec_bases, cpu);
-       }
-
-
-       for (j = 0; j < TVN_SIZE; j++) {
-               INIT_LIST_HEAD(base->tv5.vec + j);
-               INIT_LIST_HEAD(base->tv4.vec + j);
-               INIT_LIST_HEAD(base->tv3.vec + j);
-               INIT_LIST_HEAD(base->tv2.vec + j);
-       }
-       for (j = 0; j < TVR_SIZE; j++)
-               INIT_LIST_HEAD(base->tv1.vec + j);
-
-       base->timer_jiffies = jiffies;
-       base->next_timer = base->timer_jiffies;
-       base->active_timers = 0;
-       base->all_timers = 0;
-       return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
-{
-       struct timer_list *timer;
-
-       while (!list_empty(head)) {
-               timer = list_first_entry(head, struct timer_list, entry);
-               /* We ignore the accounting on the dying cpu */
-               detach_timer(timer, false);
-               timer_set_base(timer, new_base);
-               internal_add_timer(new_base, timer);
-       }
-}
-
-static void migrate_timers(int cpu)
-{
-       struct tvec_base *old_base;
-       struct tvec_base *new_base;
-       int i;
-
-       BUG_ON(cpu_online(cpu));
-       old_base = per_cpu(tvec_bases, cpu);
-       new_base = get_cpu_var(tvec_bases);
-       /*
-        * The caller is globally serialized and nobody else
-        * takes two locks at once, deadlock is not possible.
-        */
-       spin_lock_irq(&new_base->lock);
-       spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
-
-       BUG_ON(old_base->running_timer);
-
-       for (i = 0; i < TVR_SIZE; i++)
-               migrate_timer_list(new_base, old_base->tv1.vec + i);
-       for (i = 0; i < TVN_SIZE; i++) {
-               migrate_timer_list(new_base, old_base->tv2.vec + i);
-               migrate_timer_list(new_base, old_base->tv3.vec + i);
-               migrate_timer_list(new_base, old_base->tv4.vec + i);
-               migrate_timer_list(new_base, old_base->tv5.vec + i);
-       }
-
-       spin_unlock(&old_base->lock);
-       spin_unlock_irq(&new_base->lock);
-       put_cpu_var(tvec_bases);
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-static int timer_cpu_notify(struct notifier_block *self,
-                               unsigned long action, void *hcpu)
-{
-       long cpu = (long)hcpu;
-       int err;
-
-       switch(action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               err = init_timers_cpu(cpu);
-               if (err < 0)
-                       return notifier_from_errno(err);
-               break;
-#ifdef CONFIG_HOTPLUG_CPU
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               migrate_timers(cpu);
-               break;
-#endif
-       default:
-               break;
-       }
-       return NOTIFY_OK;
-}
-
-static struct notifier_block timers_nb = {
-       .notifier_call  = timer_cpu_notify,
-};
-
-
-void __init init_timers(void)
-{
-       int err;
-
-       /* ensure there are enough low bits for flags in timer->base pointer */
-       BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
-
-       err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
-                              (void *)(long)smp_processor_id());
-       BUG_ON(err != NOTIFY_OK);
-
-       init_timer_stats();
-       register_cpu_notifier(&timers_nb);
-       open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
-}
-
-/**
- * msleep - sleep safely even with waitqueue interruptions
- * @msecs: Time in milliseconds to sleep for
- */
-void msleep(unsigned int msecs)
-{
-       unsigned long timeout = msecs_to_jiffies(msecs) + 1;
-
-       while (timeout)
-               timeout = schedule_timeout_uninterruptible(timeout);
-}
-
-EXPORT_SYMBOL(msleep);
-
-/**
- * msleep_interruptible - sleep waiting for signals
- * @msecs: Time in milliseconds to sleep for
- */
-unsigned long msleep_interruptible(unsigned int msecs)
-{
-       unsigned long timeout = msecs_to_jiffies(msecs) + 1;
-
-       while (timeout && !signal_pending(current))
-               timeout = schedule_timeout_interruptible(timeout);
-       return jiffies_to_msecs(timeout);
-}
-
-EXPORT_SYMBOL(msleep_interruptible);
-
-static int __sched do_usleep_range(unsigned long min, unsigned long max)
-{
-       ktime_t kmin;
-       unsigned long delta;
-
-       kmin = ktime_set(0, min * NSEC_PER_USEC);
-       delta = (max - min) * NSEC_PER_USEC;
-       return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
-}
-
-/**
- * usleep_range - Drop in replacement for udelay where wakeup is flexible
- * @min: Minimum time in usecs to sleep
- * @max: Maximum time in usecs to sleep
- */
-void usleep_range(unsigned long min, unsigned long max)
-{
-       __set_current_state(TASK_UNINTERRUPTIBLE);
-       do_usleep_range(min, max);
-}
-EXPORT_SYMBOL(usleep_range);