Root/kernel/hrtimer.c

1/*
2 * linux/kernel/hrtimer.c
3 *
4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
7 *
8 * High-resolution kernel timers
9 *
10 * In contrast to the low-resolution timeout API implemented in
11 * kernel/timer.c, hrtimers provide finer resolution and accuracy
12 * depending on system configuration and capabilities.
13 *
14 * These timers are currently used for:
15 * - itimers
16 * - POSIX timers
17 * - nanosleep
18 * - precise in-kernel timing
19 *
20 * Started by: Thomas Gleixner and Ingo Molnar
21 *
22 * Credits:
23 * based on kernel/timer.c
24 *
25 * Help, testing, suggestions, bugfixes, improvements were
26 * provided by:
27 *
28 * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
29 * et. al.
30 *
31 * For licencing details see kernel-base/COPYING
32 */
33
34#include <linux/cpu.h>
35#include <linux/export.h>
36#include <linux/percpu.h>
37#include <linux/hrtimer.h>
38#include <linux/notifier.h>
39#include <linux/syscalls.h>
40#include <linux/kallsyms.h>
41#include <linux/interrupt.h>
42#include <linux/tick.h>
43#include <linux/seq_file.h>
44#include <linux/err.h>
45#include <linux/debugobjects.h>
46#include <linux/sched.h>
47#include <linux/timer.h>
48
49#include <asm/uaccess.h>
50
51#include <trace/events/timer.h>
52
53/*
54 * The timer bases:
55 *
56 * There are more clockids then hrtimer bases. Thus, we index
57 * into the timer bases by the hrtimer_base_type enum. When trying
58 * to reach a base using a clockid, hrtimer_clockid_to_base()
59 * is used to convert from clockid to the proper hrtimer_base_type.
60 */
61DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
62{
63
64    .clock_base =
65    {
66        {
67            .index = HRTIMER_BASE_MONOTONIC,
68            .clockid = CLOCK_MONOTONIC,
69            .get_time = &ktime_get,
70            .resolution = KTIME_LOW_RES,
71        },
72        {
73            .index = HRTIMER_BASE_REALTIME,
74            .clockid = CLOCK_REALTIME,
75            .get_time = &ktime_get_real,
76            .resolution = KTIME_LOW_RES,
77        },
78        {
79            .index = HRTIMER_BASE_BOOTTIME,
80            .clockid = CLOCK_BOOTTIME,
81            .get_time = &ktime_get_boottime,
82            .resolution = KTIME_LOW_RES,
83        },
84    }
85};
86
87static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
88    [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
89    [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
90    [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
91};
92
93static inline int hrtimer_clockid_to_base(clockid_t clock_id)
94{
95    return hrtimer_clock_to_base_table[clock_id];
96}
97
98
99/*
100 * Get the coarse grained time at the softirq based on xtime and
101 * wall_to_monotonic.
102 */
103static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
104{
105    ktime_t xtim, mono, boot;
106    struct timespec xts, tom, slp;
107
108    get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
109
110    xtim = timespec_to_ktime(xts);
111    mono = ktime_add(xtim, timespec_to_ktime(tom));
112    boot = ktime_add(mono, timespec_to_ktime(slp));
113    base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
114    base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
115    base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
116}
117
118/*
119 * Functions and macros which are different for UP/SMP systems are kept in a
120 * single place
121 */
122#ifdef CONFIG_SMP
123
124/*
125 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
126 * means that all timers which are tied to this base via timer->base are
127 * locked, and the base itself is locked too.
128 *
129 * So __run_timers/migrate_timers can safely modify all timers which could
130 * be found on the lists/queues.
131 *
132 * When the timer's base is locked, and the timer removed from list, it is
133 * possible to set timer->base = NULL and drop the lock: the timer remains
134 * locked.
135 */
136static
137struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
138                         unsigned long *flags)
139{
140    struct hrtimer_clock_base *base;
141
142    for (;;) {
143        base = timer->base;
144        if (likely(base != NULL)) {
145            raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
146            if (likely(base == timer->base))
147                return base;
148            /* The timer has migrated to another CPU: */
149            raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
150        }
151        cpu_relax();
152    }
153}
154
155
156/*
157 * Get the preferred target CPU for NOHZ
158 */
159static int hrtimer_get_target(int this_cpu, int pinned)
160{
161#ifdef CONFIG_NO_HZ
162    if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu))
163        return get_nohz_timer_target();
164#endif
165    return this_cpu;
166}
167
168/*
169 * With HIGHRES=y we do not migrate the timer when it is expiring
170 * before the next event on the target cpu because we cannot reprogram
171 * the target cpu hardware and we would cause it to fire late.
172 *
173 * Called with cpu_base->lock of target cpu held.
174 */
175static int
176hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
177{
178#ifdef CONFIG_HIGH_RES_TIMERS
179    ktime_t expires;
180
181    if (!new_base->cpu_base->hres_active)
182        return 0;
183
184    expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
185    return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
186#else
187    return 0;
188#endif
189}
190
191/*
192 * Switch the timer base to the current CPU when possible.
193 */
194static inline struct hrtimer_clock_base *
195switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
196            int pinned)
197{
198    struct hrtimer_clock_base *new_base;
199    struct hrtimer_cpu_base *new_cpu_base;
200    int this_cpu = smp_processor_id();
201    int cpu = hrtimer_get_target(this_cpu, pinned);
202    int basenum = base->index;
203
204again:
205    new_cpu_base = &per_cpu(hrtimer_bases, cpu);
206    new_base = &new_cpu_base->clock_base[basenum];
207
208    if (base != new_base) {
209        /*
210         * We are trying to move timer to new_base.
211         * However we can't change timer's base while it is running,
212         * so we keep it on the same CPU. No hassle vs. reprogramming
213         * the event source in the high resolution case. The softirq
214         * code will take care of this when the timer function has
215         * completed. There is no conflict as we hold the lock until
216         * the timer is enqueued.
217         */
218        if (unlikely(hrtimer_callback_running(timer)))
219            return base;
220
221        /* See the comment in lock_timer_base() */
222        timer->base = NULL;
223        raw_spin_unlock(&base->cpu_base->lock);
224        raw_spin_lock(&new_base->cpu_base->lock);
225
226        if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
227            cpu = this_cpu;
228            raw_spin_unlock(&new_base->cpu_base->lock);
229            raw_spin_lock(&base->cpu_base->lock);
230            timer->base = base;
231            goto again;
232        }
233        timer->base = new_base;
234    }
235    return new_base;
236}
237
238#else /* CONFIG_SMP */
239
240static inline struct hrtimer_clock_base *
241lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
242{
243    struct hrtimer_clock_base *base = timer->base;
244
245    raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
246
247    return base;
248}
249
250# define switch_hrtimer_base(t, b, p) (b)
251
252#endif /* !CONFIG_SMP */
253
254/*
255 * Functions for the union type storage format of ktime_t which are
256 * too large for inlining:
257 */
258#if BITS_PER_LONG < 64
259# ifndef CONFIG_KTIME_SCALAR
260/**
261 * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
262 * @kt: addend
263 * @nsec: the scalar nsec value to add
264 *
265 * Returns the sum of kt and nsec in ktime_t format
266 */
267ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
268{
269    ktime_t tmp;
270
271    if (likely(nsec < NSEC_PER_SEC)) {
272        tmp.tv64 = nsec;
273    } else {
274        unsigned long rem = do_div(nsec, NSEC_PER_SEC);
275
276        tmp = ktime_set((long)nsec, rem);
277    }
278
279    return ktime_add(kt, tmp);
280}
281
282EXPORT_SYMBOL_GPL(ktime_add_ns);
283
284/**
285 * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
286 * @kt: minuend
287 * @nsec: the scalar nsec value to subtract
288 *
289 * Returns the subtraction of @nsec from @kt in ktime_t format
290 */
291ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
292{
293    ktime_t tmp;
294
295    if (likely(nsec < NSEC_PER_SEC)) {
296        tmp.tv64 = nsec;
297    } else {
298        unsigned long rem = do_div(nsec, NSEC_PER_SEC);
299
300        tmp = ktime_set((long)nsec, rem);
301    }
302
303    return ktime_sub(kt, tmp);
304}
305
306EXPORT_SYMBOL_GPL(ktime_sub_ns);
307# endif /* !CONFIG_KTIME_SCALAR */
308
309/*
310 * Divide a ktime value by a nanosecond value
311 */
312u64 ktime_divns(const ktime_t kt, s64 div)
313{
314    u64 dclc;
315    int sft = 0;
316
317    dclc = ktime_to_ns(kt);
318    /* Make sure the divisor is less than 2^32: */
319    while (div >> 32) {
320        sft++;
321        div >>= 1;
322    }
323    dclc >>= sft;
324    do_div(dclc, (unsigned long) div);
325
326    return dclc;
327}
328#endif /* BITS_PER_LONG >= 64 */
329
330/*
331 * Add two ktime values and do a safety check for overflow:
332 */
333ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
334{
335    ktime_t res = ktime_add(lhs, rhs);
336
337    /*
338     * We use KTIME_SEC_MAX here, the maximum timeout which we can
339     * return to user space in a timespec:
340     */
341    if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
342        res = ktime_set(KTIME_SEC_MAX, 0);
343
344    return res;
345}
346
347EXPORT_SYMBOL_GPL(ktime_add_safe);
348
349#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
350
351static struct debug_obj_descr hrtimer_debug_descr;
352
353static void *hrtimer_debug_hint(void *addr)
354{
355    return ((struct hrtimer *) addr)->function;
356}
357
358/*
359 * fixup_init is called when:
360 * - an active object is initialized
361 */
362static int hrtimer_fixup_init(void *addr, enum debug_obj_state state)
363{
364    struct hrtimer *timer = addr;
365
366    switch (state) {
367    case ODEBUG_STATE_ACTIVE:
368        hrtimer_cancel(timer);
369        debug_object_init(timer, &hrtimer_debug_descr);
370        return 1;
371    default:
372        return 0;
373    }
374}
375
376/*
377 * fixup_activate is called when:
378 * - an active object is activated
379 * - an unknown object is activated (might be a statically initialized object)
380 */
381static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
382{
383    switch (state) {
384
385    case ODEBUG_STATE_NOTAVAILABLE:
386        WARN_ON_ONCE(1);
387        return 0;
388
389    case ODEBUG_STATE_ACTIVE:
390        WARN_ON(1);
391
392    default:
393        return 0;
394    }
395}
396
397/*
398 * fixup_free is called when:
399 * - an active object is freed
400 */
401static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
402{
403    struct hrtimer *timer = addr;
404
405    switch (state) {
406    case ODEBUG_STATE_ACTIVE:
407        hrtimer_cancel(timer);
408        debug_object_free(timer, &hrtimer_debug_descr);
409        return 1;
410    default:
411        return 0;
412    }
413}
414
415static struct debug_obj_descr hrtimer_debug_descr = {
416    .name = "hrtimer",
417    .debug_hint = hrtimer_debug_hint,
418    .fixup_init = hrtimer_fixup_init,
419    .fixup_activate = hrtimer_fixup_activate,
420    .fixup_free = hrtimer_fixup_free,
421};
422
423static inline void debug_hrtimer_init(struct hrtimer *timer)
424{
425    debug_object_init(timer, &hrtimer_debug_descr);
426}
427
428static inline void debug_hrtimer_activate(struct hrtimer *timer)
429{
430    debug_object_activate(timer, &hrtimer_debug_descr);
431}
432
433static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
434{
435    debug_object_deactivate(timer, &hrtimer_debug_descr);
436}
437
438static inline void debug_hrtimer_free(struct hrtimer *timer)
439{
440    debug_object_free(timer, &hrtimer_debug_descr);
441}
442
443static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
444               enum hrtimer_mode mode);
445
446void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
447               enum hrtimer_mode mode)
448{
449    debug_object_init_on_stack(timer, &hrtimer_debug_descr);
450    __hrtimer_init(timer, clock_id, mode);
451}
452EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
453
454void destroy_hrtimer_on_stack(struct hrtimer *timer)
455{
456    debug_object_free(timer, &hrtimer_debug_descr);
457}
458
459#else
460static inline void debug_hrtimer_init(struct hrtimer *timer) { }
461static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
462static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
463#endif
464
465static inline void
466debug_init(struct hrtimer *timer, clockid_t clockid,
467       enum hrtimer_mode mode)
468{
469    debug_hrtimer_init(timer);
470    trace_hrtimer_init(timer, clockid, mode);
471}
472
473static inline void debug_activate(struct hrtimer *timer)
474{
475    debug_hrtimer_activate(timer);
476    trace_hrtimer_start(timer);
477}
478
479static inline void debug_deactivate(struct hrtimer *timer)
480{
481    debug_hrtimer_deactivate(timer);
482    trace_hrtimer_cancel(timer);
483}
484
485/* High resolution timer related functions */
486#ifdef CONFIG_HIGH_RES_TIMERS
487
488/*
489 * High resolution timer enabled ?
490 */
491static int hrtimer_hres_enabled __read_mostly = 1;
492
493/*
494 * Enable / Disable high resolution mode
495 */
496static int __init setup_hrtimer_hres(char *str)
497{
498    if (!strcmp(str, "off"))
499        hrtimer_hres_enabled = 0;
500    else if (!strcmp(str, "on"))
501        hrtimer_hres_enabled = 1;
502    else
503        return 0;
504    return 1;
505}
506
507__setup("highres=", setup_hrtimer_hres);
508
509/*
510 * hrtimer_high_res_enabled - query, if the highres mode is enabled
511 */
512static inline int hrtimer_is_hres_enabled(void)
513{
514    return hrtimer_hres_enabled;
515}
516
517/*
518 * Is the high resolution mode active ?
519 */
520static inline int hrtimer_hres_active(void)
521{
522    return __this_cpu_read(hrtimer_bases.hres_active);
523}
524
525/*
526 * Reprogram the event source with checking both queues for the
527 * next event
528 * Called with interrupts disabled and base->lock held
529 */
530static void
531hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
532{
533    int i;
534    struct hrtimer_clock_base *base = cpu_base->clock_base;
535    ktime_t expires, expires_next;
536
537    expires_next.tv64 = KTIME_MAX;
538
539    for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
540        struct hrtimer *timer;
541        struct timerqueue_node *next;
542
543        next = timerqueue_getnext(&base->active);
544        if (!next)
545            continue;
546        timer = container_of(next, struct hrtimer, node);
547
548        expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
549        /*
550         * clock_was_set() has changed base->offset so the
551         * result might be negative. Fix it up to prevent a
552         * false positive in clockevents_program_event()
553         */
554        if (expires.tv64 < 0)
555            expires.tv64 = 0;
556        if (expires.tv64 < expires_next.tv64)
557            expires_next = expires;
558    }
559
560    if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
561        return;
562
563    cpu_base->expires_next.tv64 = expires_next.tv64;
564
565    if (cpu_base->expires_next.tv64 != KTIME_MAX)
566        tick_program_event(cpu_base->expires_next, 1);
567}
568
569/*
570 * Shared reprogramming for clock_realtime and clock_monotonic
571 *
572 * When a timer is enqueued and expires earlier than the already enqueued
573 * timers, we have to check, whether it expires earlier than the timer for
574 * which the clock event device was armed.
575 *
576 * Called with interrupts disabled and base->cpu_base.lock held
577 */
578static int hrtimer_reprogram(struct hrtimer *timer,
579                 struct hrtimer_clock_base *base)
580{
581    struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
582    ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
583    int res;
584
585    WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
586
587    /*
588     * When the callback is running, we do not reprogram the clock event
589     * device. The timer callback is either running on a different CPU or
590     * the callback is executed in the hrtimer_interrupt context. The
591     * reprogramming is handled either by the softirq, which called the
592     * callback or at the end of the hrtimer_interrupt.
593     */
594    if (hrtimer_callback_running(timer))
595        return 0;
596
597    /*
598     * CLOCK_REALTIME timer might be requested with an absolute
599     * expiry time which is less than base->offset. Nothing wrong
600     * about that, just avoid to call into the tick code, which
601     * has now objections against negative expiry values.
602     */
603    if (expires.tv64 < 0)
604        return -ETIME;
605
606    if (expires.tv64 >= cpu_base->expires_next.tv64)
607        return 0;
608
609    /*
610     * If a hang was detected in the last timer interrupt then we
611     * do not schedule a timer which is earlier than the expiry
612     * which we enforced in the hang detection. We want the system
613     * to make progress.
614     */
615    if (cpu_base->hang_detected)
616        return 0;
617
618    /*
619     * Clockevents returns -ETIME, when the event was in the past.
620     */
621    res = tick_program_event(expires, 0);
622    if (!IS_ERR_VALUE(res))
623        cpu_base->expires_next = expires;
624    return res;
625}
626
627/*
628 * Initialize the high resolution related parts of cpu_base
629 */
630static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
631{
632    base->expires_next.tv64 = KTIME_MAX;
633    base->hres_active = 0;
634}
635
636/*
637 * When High resolution timers are active, try to reprogram. Note, that in case
638 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
639 * check happens. The timer gets enqueued into the rbtree. The reprogramming
640 * and expiry check is done in the hrtimer_interrupt or in the softirq.
641 */
642static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
643                        struct hrtimer_clock_base *base,
644                        int wakeup)
645{
646    if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
647        if (wakeup) {
648            raw_spin_unlock(&base->cpu_base->lock);
649            raise_softirq_irqoff(HRTIMER_SOFTIRQ);
650            raw_spin_lock(&base->cpu_base->lock);
651        } else
652            __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
653
654        return 1;
655    }
656
657    return 0;
658}
659
660static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
661{
662    ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
663    ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
664
665    return ktime_get_update_offsets(offs_real, offs_boot);
666}
667
668/*
669 * Retrigger next event is called after clock was set
670 *
671 * Called with interrupts disabled via on_each_cpu()
672 */
673static void retrigger_next_event(void *arg)
674{
675    struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
676
677    if (!hrtimer_hres_active())
678        return;
679
680    raw_spin_lock(&base->lock);
681    hrtimer_update_base(base);
682    hrtimer_force_reprogram(base, 0);
683    raw_spin_unlock(&base->lock);
684}
685
686/*
687 * Switch to high resolution mode
688 */
689static int hrtimer_switch_to_hres(void)
690{
691    int i, cpu = smp_processor_id();
692    struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
693    unsigned long flags;
694
695    if (base->hres_active)
696        return 1;
697
698    local_irq_save(flags);
699
700    if (tick_init_highres()) {
701        local_irq_restore(flags);
702        printk(KERN_WARNING "Could not switch to high resolution "
703                    "mode on CPU %d\n", cpu);
704        return 0;
705    }
706    base->hres_active = 1;
707    for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
708        base->clock_base[i].resolution = KTIME_HIGH_RES;
709
710    tick_setup_sched_timer();
711    /* "Retrigger" the interrupt to get things going */
712    retrigger_next_event(NULL);
713    local_irq_restore(flags);
714    return 1;
715}
716
717/*
718 * Called from timekeeping code to reprogramm the hrtimer interrupt
719 * device. If called from the timer interrupt context we defer it to
720 * softirq context.
721 */
722void clock_was_set_delayed(void)
723{
724    struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
725
726    cpu_base->clock_was_set = 1;
727    __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
728}
729
730#else
731
732static inline int hrtimer_hres_active(void) { return 0; }
733static inline int hrtimer_is_hres_enabled(void) { return 0; }
734static inline int hrtimer_switch_to_hres(void) { return 0; }
735static inline void
736hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
737static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
738                        struct hrtimer_clock_base *base,
739                        int wakeup)
740{
741    return 0;
742}
743static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
744static inline void retrigger_next_event(void *arg) { }
745
746#endif /* CONFIG_HIGH_RES_TIMERS */
747
748/*
749 * Clock realtime was set
750 *
751 * Change the offset of the realtime clock vs. the monotonic
752 * clock.
753 *
754 * We might have to reprogram the high resolution timer interrupt. On
755 * SMP we call the architecture specific code to retrigger _all_ high
756 * resolution timer interrupts. On UP we just disable interrupts and
757 * call the high resolution interrupt code.
758 */
759void clock_was_set(void)
760{
761#ifdef CONFIG_HIGH_RES_TIMERS
762    /* Retrigger the CPU local events everywhere */
763    on_each_cpu(retrigger_next_event, NULL, 1);
764#endif
765    timerfd_clock_was_set();
766}
767
768/*
769 * During resume we might have to reprogram the high resolution timer
770 * interrupt (on the local CPU):
771 */
772void hrtimers_resume(void)
773{
774    WARN_ONCE(!irqs_disabled(),
775          KERN_INFO "hrtimers_resume() called with IRQs enabled!");
776
777    retrigger_next_event(NULL);
778    timerfd_clock_was_set();
779}
780
781static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
782{
783#ifdef CONFIG_TIMER_STATS
784    if (timer->start_site)
785        return;
786    timer->start_site = __builtin_return_address(0);
787    memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
788    timer->start_pid = current->pid;
789#endif
790}
791
792static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
793{
794#ifdef CONFIG_TIMER_STATS
795    timer->start_site = NULL;
796#endif
797}
798
799static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
800{
801#ifdef CONFIG_TIMER_STATS
802    if (likely(!timer_stats_active))
803        return;
804    timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
805                 timer->function, timer->start_comm, 0);
806#endif
807}
808
809/*
810 * Counterpart to lock_hrtimer_base above:
811 */
812static inline
813void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
814{
815    raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
816}
817
818/**
819 * hrtimer_forward - forward the timer expiry
820 * @timer: hrtimer to forward
821 * @now: forward past this time
822 * @interval: the interval to forward
823 *
824 * Forward the timer expiry so it will expire in the future.
825 * Returns the number of overruns.
826 */
827u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
828{
829    u64 orun = 1;
830    ktime_t delta;
831
832    delta = ktime_sub(now, hrtimer_get_expires(timer));
833
834    if (delta.tv64 < 0)
835        return 0;
836
837    if (interval.tv64 < timer->base->resolution.tv64)
838        interval.tv64 = timer->base->resolution.tv64;
839
840    if (unlikely(delta.tv64 >= interval.tv64)) {
841        s64 incr = ktime_to_ns(interval);
842
843        orun = ktime_divns(delta, incr);
844        hrtimer_add_expires_ns(timer, incr * orun);
845        if (hrtimer_get_expires_tv64(timer) > now.tv64)
846            return orun;
847        /*
848         * This (and the ktime_add() below) is the
849         * correction for exact:
850         */
851        orun++;
852    }
853    hrtimer_add_expires(timer, interval);
854
855    return orun;
856}
857EXPORT_SYMBOL_GPL(hrtimer_forward);
858
859/*
860 * enqueue_hrtimer - internal function to (re)start a timer
861 *
862 * The timer is inserted in expiry order. Insertion into the
863 * red black tree is O(log(n)). Must hold the base lock.
864 *
865 * Returns 1 when the new timer is the leftmost timer in the tree.
866 */
867static int enqueue_hrtimer(struct hrtimer *timer,
868               struct hrtimer_clock_base *base)
869{
870    debug_activate(timer);
871
872    timerqueue_add(&base->active, &timer->node);
873    base->cpu_base->active_bases |= 1 << base->index;
874
875    /*
876     * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
877     * state of a possibly running callback.
878     */
879    timer->state |= HRTIMER_STATE_ENQUEUED;
880
881    return (&timer->node == base->active.next);
882}
883
884/*
885 * __remove_hrtimer - internal function to remove a timer
886 *
887 * Caller must hold the base lock.
888 *
889 * High resolution timer mode reprograms the clock event device when the
890 * timer is the one which expires next. The caller can disable this by setting
891 * reprogram to zero. This is useful, when the context does a reprogramming
892 * anyway (e.g. timer interrupt)
893 */
894static void __remove_hrtimer(struct hrtimer *timer,
895                 struct hrtimer_clock_base *base,
896                 unsigned long newstate, int reprogram)
897{
898    struct timerqueue_node *next_timer;
899    if (!(timer->state & HRTIMER_STATE_ENQUEUED))
900        goto out;
901
902    next_timer = timerqueue_getnext(&base->active);
903    timerqueue_del(&base->active, &timer->node);
904    if (&timer->node == next_timer) {
905#ifdef CONFIG_HIGH_RES_TIMERS
906        /* Reprogram the clock event device. if enabled */
907        if (reprogram && hrtimer_hres_active()) {
908            ktime_t expires;
909
910            expires = ktime_sub(hrtimer_get_expires(timer),
911                        base->offset);
912            if (base->cpu_base->expires_next.tv64 == expires.tv64)
913                hrtimer_force_reprogram(base->cpu_base, 1);
914        }
915#endif
916    }
917    if (!timerqueue_getnext(&base->active))
918        base->cpu_base->active_bases &= ~(1 << base->index);
919out:
920    timer->state = newstate;
921}
922
923/*
924 * remove hrtimer, called with base lock held
925 */
926static inline int
927remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
928{
929    if (hrtimer_is_queued(timer)) {
930        unsigned long state;
931        int reprogram;
932
933        /*
934         * Remove the timer and force reprogramming when high
935         * resolution mode is active and the timer is on the current
936         * CPU. If we remove a timer on another CPU, reprogramming is
937         * skipped. The interrupt event on this CPU is fired and
938         * reprogramming happens in the interrupt handler. This is a
939         * rare case and less expensive than a smp call.
940         */
941        debug_deactivate(timer);
942        timer_stats_hrtimer_clear_start_info(timer);
943        reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
944        /*
945         * We must preserve the CALLBACK state flag here,
946         * otherwise we could move the timer base in
947         * switch_hrtimer_base.
948         */
949        state = timer->state & HRTIMER_STATE_CALLBACK;
950        __remove_hrtimer(timer, base, state, reprogram);
951        return 1;
952    }
953    return 0;
954}
955
956int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
957        unsigned long delta_ns, const enum hrtimer_mode mode,
958        int wakeup)
959{
960    struct hrtimer_clock_base *base, *new_base;
961    unsigned long flags;
962    int ret, leftmost;
963
964    base = lock_hrtimer_base(timer, &flags);
965
966    /* Remove an active timer from the queue: */
967    ret = remove_hrtimer(timer, base);
968
969    /* Switch the timer base, if necessary: */
970    new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
971
972    if (mode & HRTIMER_MODE_REL) {
973        tim = ktime_add_safe(tim, new_base->get_time());
974        /*
975         * CONFIG_TIME_LOW_RES is a temporary way for architectures
976         * to signal that they simply return xtime in
977         * do_gettimeoffset(). In this case we want to round up by
978         * resolution when starting a relative timer, to avoid short
979         * timeouts. This will go away with the GTOD framework.
980         */
981#ifdef CONFIG_TIME_LOW_RES
982        tim = ktime_add_safe(tim, base->resolution);
983#endif
984    }
985
986    hrtimer_set_expires_range_ns(timer, tim, delta_ns);
987
988    timer_stats_hrtimer_set_start_info(timer);
989
990    leftmost = enqueue_hrtimer(timer, new_base);
991
992    /*
993     * Only allow reprogramming if the new base is on this CPU.
994     * (it might still be on another CPU if the timer was pending)
995     *
996     * XXX send_remote_softirq() ?
997     */
998    if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases))
999        hrtimer_enqueue_reprogram(timer, new_base, wakeup);
1000
1001    unlock_hrtimer_base(timer, &flags);
1002
1003    return ret;
1004}
1005
1006/**
1007 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
1008 * @timer: the timer to be added
1009 * @tim: expiry time
1010 * @delta_ns: "slack" range for the timer
1011 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
1012 *
1013 * Returns:
1014 * 0 on success
1015 * 1 when the timer was active
1016 */
1017int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1018        unsigned long delta_ns, const enum hrtimer_mode mode)
1019{
1020    return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1);
1021}
1022EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1023
1024/**
1025 * hrtimer_start - (re)start an hrtimer on the current CPU
1026 * @timer: the timer to be added
1027 * @tim: expiry time
1028 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
1029 *
1030 * Returns:
1031 * 0 on success
1032 * 1 when the timer was active
1033 */
1034int
1035hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
1036{
1037    return __hrtimer_start_range_ns(timer, tim, 0, mode, 1);
1038}
1039EXPORT_SYMBOL_GPL(hrtimer_start);
1040
1041
1042/**
1043 * hrtimer_try_to_cancel - try to deactivate a timer
1044 * @timer: hrtimer to stop
1045 *
1046 * Returns:
1047 * 0 when the timer was not active
1048 * 1 when the timer was active
1049 * -1 when the timer is currently excuting the callback function and
1050 * cannot be stopped
1051 */
1052int hrtimer_try_to_cancel(struct hrtimer *timer)
1053{
1054    struct hrtimer_clock_base *base;
1055    unsigned long flags;
1056    int ret = -1;
1057
1058    base = lock_hrtimer_base(timer, &flags);
1059
1060    if (!hrtimer_callback_running(timer))
1061        ret = remove_hrtimer(timer, base);
1062
1063    unlock_hrtimer_base(timer, &flags);
1064
1065    return ret;
1066
1067}
1068EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
1069
1070/**
1071 * hrtimer_cancel - cancel a timer and wait for the handler to finish.
1072 * @timer: the timer to be cancelled
1073 *
1074 * Returns:
1075 * 0 when the timer was not active
1076 * 1 when the timer was active
1077 */
1078int hrtimer_cancel(struct hrtimer *timer)
1079{
1080    for (;;) {
1081        int ret = hrtimer_try_to_cancel(timer);
1082
1083        if (ret >= 0)
1084            return ret;
1085        cpu_relax();
1086    }
1087}
1088EXPORT_SYMBOL_GPL(hrtimer_cancel);
1089
1090/**
1091 * hrtimer_get_remaining - get remaining time for the timer
1092 * @timer: the timer to read
1093 */
1094ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
1095{
1096    unsigned long flags;
1097    ktime_t rem;
1098
1099    lock_hrtimer_base(timer, &flags);
1100    rem = hrtimer_expires_remaining(timer);
1101    unlock_hrtimer_base(timer, &flags);
1102
1103    return rem;
1104}
1105EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
1106
1107#ifdef CONFIG_NO_HZ
1108/**
1109 * hrtimer_get_next_event - get the time until next expiry event
1110 *
1111 * Returns the delta to the next expiry event or KTIME_MAX if no timer
1112 * is pending.
1113 */
1114ktime_t hrtimer_get_next_event(void)
1115{
1116    struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1117    struct hrtimer_clock_base *base = cpu_base->clock_base;
1118    ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
1119    unsigned long flags;
1120    int i;
1121
1122    raw_spin_lock_irqsave(&cpu_base->lock, flags);
1123
1124    if (!hrtimer_hres_active()) {
1125        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
1126            struct hrtimer *timer;
1127            struct timerqueue_node *next;
1128
1129            next = timerqueue_getnext(&base->active);
1130            if (!next)
1131                continue;
1132
1133            timer = container_of(next, struct hrtimer, node);
1134            delta.tv64 = hrtimer_get_expires_tv64(timer);
1135            delta = ktime_sub(delta, base->get_time());
1136            if (delta.tv64 < mindelta.tv64)
1137                mindelta.tv64 = delta.tv64;
1138        }
1139    }
1140
1141    raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1142
1143    if (mindelta.tv64 < 0)
1144        mindelta.tv64 = 0;
1145    return mindelta;
1146}
1147#endif
1148
1149static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1150               enum hrtimer_mode mode)
1151{
1152    struct hrtimer_cpu_base *cpu_base;
1153    int base;
1154
1155    memset(timer, 0, sizeof(struct hrtimer));
1156
1157    cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1158
1159    if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
1160        clock_id = CLOCK_MONOTONIC;
1161
1162    base = hrtimer_clockid_to_base(clock_id);
1163    timer->base = &cpu_base->clock_base[base];
1164    timerqueue_init(&timer->node);
1165
1166#ifdef CONFIG_TIMER_STATS
1167    timer->start_site = NULL;
1168    timer->start_pid = -1;
1169    memset(timer->start_comm, 0, TASK_COMM_LEN);
1170#endif
1171}
1172
1173/**
1174 * hrtimer_init - initialize a timer to the given clock
1175 * @timer: the timer to be initialized
1176 * @clock_id: the clock to be used
1177 * @mode: timer mode abs/rel
1178 */
1179void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1180          enum hrtimer_mode mode)
1181{
1182    debug_init(timer, clock_id, mode);
1183    __hrtimer_init(timer, clock_id, mode);
1184}
1185EXPORT_SYMBOL_GPL(hrtimer_init);
1186
1187/**
1188 * hrtimer_get_res - get the timer resolution for a clock
1189 * @which_clock: which clock to query
1190 * @tp: pointer to timespec variable to store the resolution
1191 *
1192 * Store the resolution of the clock selected by @which_clock in the
1193 * variable pointed to by @tp.
1194 */
1195int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
1196{
1197    struct hrtimer_cpu_base *cpu_base;
1198    int base = hrtimer_clockid_to_base(which_clock);
1199
1200    cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1201    *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution);
1202
1203    return 0;
1204}
1205EXPORT_SYMBOL_GPL(hrtimer_get_res);
1206
1207static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
1208{
1209    struct hrtimer_clock_base *base = timer->base;
1210    struct hrtimer_cpu_base *cpu_base = base->cpu_base;
1211    enum hrtimer_restart (*fn)(struct hrtimer *);
1212    int restart;
1213
1214    WARN_ON(!irqs_disabled());
1215
1216    debug_deactivate(timer);
1217    __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
1218    timer_stats_account_hrtimer(timer);
1219    fn = timer->function;
1220
1221    /*
1222     * Because we run timers from hardirq context, there is no chance
1223     * they get migrated to another cpu, therefore its safe to unlock
1224     * the timer base.
1225     */
1226    raw_spin_unlock(&cpu_base->lock);
1227    trace_hrtimer_expire_entry(timer, now);
1228    restart = fn(timer);
1229    trace_hrtimer_expire_exit(timer);
1230    raw_spin_lock(&cpu_base->lock);
1231
1232    /*
1233     * Note: We clear the CALLBACK bit after enqueue_hrtimer and
1234     * we do not reprogramm the event hardware. Happens either in
1235     * hrtimer_start_range_ns() or in hrtimer_interrupt()
1236     */
1237    if (restart != HRTIMER_NORESTART) {
1238        BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
1239        enqueue_hrtimer(timer, base);
1240    }
1241
1242    WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
1243
1244    timer->state &= ~HRTIMER_STATE_CALLBACK;
1245}
1246
1247#ifdef CONFIG_HIGH_RES_TIMERS
1248
1249/*
1250 * High resolution timer interrupt
1251 * Called with interrupts disabled
1252 */
1253void hrtimer_interrupt(struct clock_event_device *dev)
1254{
1255    struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1256    ktime_t expires_next, now, entry_time, delta;
1257    int i, retries = 0;
1258
1259    BUG_ON(!cpu_base->hres_active);
1260    cpu_base->nr_events++;
1261    dev->next_event.tv64 = KTIME_MAX;
1262
1263    raw_spin_lock(&cpu_base->lock);
1264    entry_time = now = hrtimer_update_base(cpu_base);
1265retry:
1266    expires_next.tv64 = KTIME_MAX;
1267    /*
1268     * We set expires_next to KTIME_MAX here with cpu_base->lock
1269     * held to prevent that a timer is enqueued in our queue via
1270     * the migration code. This does not affect enqueueing of
1271     * timers which run their callback and need to be requeued on
1272     * this CPU.
1273     */
1274    cpu_base->expires_next.tv64 = KTIME_MAX;
1275
1276    for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1277        struct hrtimer_clock_base *base;
1278        struct timerqueue_node *node;
1279        ktime_t basenow;
1280
1281        if (!(cpu_base->active_bases & (1 << i)))
1282            continue;
1283
1284        base = cpu_base->clock_base + i;
1285        basenow = ktime_add(now, base->offset);
1286
1287        while ((node = timerqueue_getnext(&base->active))) {
1288            struct hrtimer *timer;
1289
1290            timer = container_of(node, struct hrtimer, node);
1291
1292            /*
1293             * The immediate goal for using the softexpires is
1294             * minimizing wakeups, not running timers at the
1295             * earliest interrupt after their soft expiration.
1296             * This allows us to avoid using a Priority Search
1297             * Tree, which can answer a stabbing querry for
1298             * overlapping intervals and instead use the simple
1299             * BST we already have.
1300             * We don't add extra wakeups by delaying timers that
1301             * are right-of a not yet expired timer, because that
1302             * timer will have to trigger a wakeup anyway.
1303             */
1304
1305            if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
1306                ktime_t expires;
1307
1308                expires = ktime_sub(hrtimer_get_expires(timer),
1309                            base->offset);
1310                if (expires.tv64 < expires_next.tv64)
1311                    expires_next = expires;
1312                break;
1313            }
1314
1315            __run_hrtimer(timer, &basenow);
1316        }
1317    }
1318
1319    /*
1320     * Store the new expiry value so the migration code can verify
1321     * against it.
1322     */
1323    cpu_base->expires_next = expires_next;
1324    raw_spin_unlock(&cpu_base->lock);
1325
1326    /* Reprogramming necessary ? */
1327    if (expires_next.tv64 == KTIME_MAX ||
1328        !tick_program_event(expires_next, 0)) {
1329        cpu_base->hang_detected = 0;
1330        return;
1331    }
1332
1333    /*
1334     * The next timer was already expired due to:
1335     * - tracing
1336     * - long lasting callbacks
1337     * - being scheduled away when running in a VM
1338     *
1339     * We need to prevent that we loop forever in the hrtimer
1340     * interrupt routine. We give it 3 attempts to avoid
1341     * overreacting on some spurious event.
1342     *
1343     * Acquire base lock for updating the offsets and retrieving
1344     * the current time.
1345     */
1346    raw_spin_lock(&cpu_base->lock);
1347    now = hrtimer_update_base(cpu_base);
1348    cpu_base->nr_retries++;
1349    if (++retries < 3)
1350        goto retry;
1351    /*
1352     * Give the system a chance to do something else than looping
1353     * here. We stored the entry time, so we know exactly how long
1354     * we spent here. We schedule the next event this amount of
1355     * time away.
1356     */
1357    cpu_base->nr_hangs++;
1358    cpu_base->hang_detected = 1;
1359    raw_spin_unlock(&cpu_base->lock);
1360    delta = ktime_sub(now, entry_time);
1361    if (delta.tv64 > cpu_base->max_hang_time.tv64)
1362        cpu_base->max_hang_time = delta;
1363    /*
1364     * Limit it to a sensible value as we enforce a longer
1365     * delay. Give the CPU at least 100ms to catch up.
1366     */
1367    if (delta.tv64 > 100 * NSEC_PER_MSEC)
1368        expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
1369    else
1370        expires_next = ktime_add(now, delta);
1371    tick_program_event(expires_next, 1);
1372    printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
1373            ktime_to_ns(delta));
1374}
1375
1376/*
1377 * local version of hrtimer_peek_ahead_timers() called with interrupts
1378 * disabled.
1379 */
1380static void __hrtimer_peek_ahead_timers(void)
1381{
1382    struct tick_device *td;
1383
1384    if (!hrtimer_hres_active())
1385        return;
1386
1387    td = &__get_cpu_var(tick_cpu_device);
1388    if (td && td->evtdev)
1389        hrtimer_interrupt(td->evtdev);
1390}
1391
1392/**
1393 * hrtimer_peek_ahead_timers -- run soft-expired timers now
1394 *
1395 * hrtimer_peek_ahead_timers will peek at the timer queue of
1396 * the current cpu and check if there are any timers for which
1397 * the soft expires time has passed. If any such timers exist,
1398 * they are run immediately and then removed from the timer queue.
1399 *
1400 */
1401void hrtimer_peek_ahead_timers(void)
1402{
1403    unsigned long flags;
1404
1405    local_irq_save(flags);
1406    __hrtimer_peek_ahead_timers();
1407    local_irq_restore(flags);
1408}
1409
1410static void run_hrtimer_softirq(struct softirq_action *h)
1411{
1412    struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1413
1414    if (cpu_base->clock_was_set) {
1415        cpu_base->clock_was_set = 0;
1416        clock_was_set();
1417    }
1418
1419    hrtimer_peek_ahead_timers();
1420}
1421
1422#else /* CONFIG_HIGH_RES_TIMERS */
1423
1424static inline void __hrtimer_peek_ahead_timers(void) { }
1425
1426#endif /* !CONFIG_HIGH_RES_TIMERS */
1427
1428/*
1429 * Called from timer softirq every jiffy, expire hrtimers:
1430 *
1431 * For HRT its the fall back code to run the softirq in the timer
1432 * softirq context in case the hrtimer initialization failed or has
1433 * not been done yet.
1434 */
1435void hrtimer_run_pending(void)
1436{
1437    if (hrtimer_hres_active())
1438        return;
1439
1440    /*
1441     * This _is_ ugly: We have to check in the softirq context,
1442     * whether we can switch to highres and / or nohz mode. The
1443     * clocksource switch happens in the timer interrupt with
1444     * xtime_lock held. Notification from there only sets the
1445     * check bit in the tick_oneshot code, otherwise we might
1446     * deadlock vs. xtime_lock.
1447     */
1448    if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
1449        hrtimer_switch_to_hres();
1450}
1451
1452/*
1453 * Called from hardirq context every jiffy
1454 */
1455void hrtimer_run_queues(void)
1456{
1457    struct timerqueue_node *node;
1458    struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1459    struct hrtimer_clock_base *base;
1460    int index, gettime = 1;
1461
1462    if (hrtimer_hres_active())
1463        return;
1464
1465    for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
1466        base = &cpu_base->clock_base[index];
1467        if (!timerqueue_getnext(&base->active))
1468            continue;
1469
1470        if (gettime) {
1471            hrtimer_get_softirq_time(cpu_base);
1472            gettime = 0;
1473        }
1474
1475        raw_spin_lock(&cpu_base->lock);
1476
1477        while ((node = timerqueue_getnext(&base->active))) {
1478            struct hrtimer *timer;
1479
1480            timer = container_of(node, struct hrtimer, node);
1481            if (base->softirq_time.tv64 <=
1482                    hrtimer_get_expires_tv64(timer))
1483                break;
1484
1485            __run_hrtimer(timer, &base->softirq_time);
1486        }
1487        raw_spin_unlock(&cpu_base->lock);
1488    }
1489}
1490
1491/*
1492 * Sleep related functions:
1493 */
1494static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
1495{
1496    struct hrtimer_sleeper *t =
1497        container_of(timer, struct hrtimer_sleeper, timer);
1498    struct task_struct *task = t->task;
1499
1500    t->task = NULL;
1501    if (task)
1502        wake_up_process(task);
1503
1504    return HRTIMER_NORESTART;
1505}
1506
1507void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
1508{
1509    sl->timer.function = hrtimer_wakeup;
1510    sl->task = task;
1511}
1512EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
1513
1514static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
1515{
1516    hrtimer_init_sleeper(t, current);
1517
1518    do {
1519        set_current_state(TASK_INTERRUPTIBLE);
1520        hrtimer_start_expires(&t->timer, mode);
1521        if (!hrtimer_active(&t->timer))
1522            t->task = NULL;
1523
1524        if (likely(t->task))
1525            schedule();
1526
1527        hrtimer_cancel(&t->timer);
1528        mode = HRTIMER_MODE_ABS;
1529
1530    } while (t->task && !signal_pending(current));
1531
1532    __set_current_state(TASK_RUNNING);
1533
1534    return t->task == NULL;
1535}
1536
1537static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
1538{
1539    struct timespec rmt;
1540    ktime_t rem;
1541
1542    rem = hrtimer_expires_remaining(timer);
1543    if (rem.tv64 <= 0)
1544        return 0;
1545    rmt = ktime_to_timespec(rem);
1546
1547    if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
1548        return -EFAULT;
1549
1550    return 1;
1551}
1552
1553long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
1554{
1555    struct hrtimer_sleeper t;
1556    struct timespec __user *rmtp;
1557    int ret = 0;
1558
1559    hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
1560                HRTIMER_MODE_ABS);
1561    hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
1562
1563    if (do_nanosleep(&t, HRTIMER_MODE_ABS))
1564        goto out;
1565
1566    rmtp = restart->nanosleep.rmtp;
1567    if (rmtp) {
1568        ret = update_rmtp(&t.timer, rmtp);
1569        if (ret <= 0)
1570            goto out;
1571    }
1572
1573    /* The other values in restart are already filled in */
1574    ret = -ERESTART_RESTARTBLOCK;
1575out:
1576    destroy_hrtimer_on_stack(&t.timer);
1577    return ret;
1578}
1579
1580long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
1581               const enum hrtimer_mode mode, const clockid_t clockid)
1582{
1583    struct restart_block *restart;
1584    struct hrtimer_sleeper t;
1585    int ret = 0;
1586    unsigned long slack;
1587
1588    slack = current->timer_slack_ns;
1589    if (rt_task(current))
1590        slack = 0;
1591
1592    hrtimer_init_on_stack(&t.timer, clockid, mode);
1593    hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
1594    if (do_nanosleep(&t, mode))
1595        goto out;
1596
1597    /* Absolute timers do not update the rmtp value and restart: */
1598    if (mode == HRTIMER_MODE_ABS) {
1599        ret = -ERESTARTNOHAND;
1600        goto out;
1601    }
1602
1603    if (rmtp) {
1604        ret = update_rmtp(&t.timer, rmtp);
1605        if (ret <= 0)
1606            goto out;
1607    }
1608
1609    restart = &current_thread_info()->restart_block;
1610    restart->fn = hrtimer_nanosleep_restart;
1611    restart->nanosleep.clockid = t.timer.base->clockid;
1612    restart->nanosleep.rmtp = rmtp;
1613    restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
1614
1615    ret = -ERESTART_RESTARTBLOCK;
1616out:
1617    destroy_hrtimer_on_stack(&t.timer);
1618    return ret;
1619}
1620
1621SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
1622        struct timespec __user *, rmtp)
1623{
1624    struct timespec tu;
1625
1626    if (copy_from_user(&tu, rqtp, sizeof(tu)))
1627        return -EFAULT;
1628
1629    if (!timespec_valid(&tu))
1630        return -EINVAL;
1631
1632    return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
1633}
1634
1635/*
1636 * Functions related to boot-time initialization:
1637 */
1638static void __cpuinit init_hrtimers_cpu(int cpu)
1639{
1640    struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
1641    int i;
1642
1643    raw_spin_lock_init(&cpu_base->lock);
1644
1645    for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1646        cpu_base->clock_base[i].cpu_base = cpu_base;
1647        timerqueue_init_head(&cpu_base->clock_base[i].active);
1648    }
1649
1650    hrtimer_init_hres(cpu_base);
1651}
1652
1653#ifdef CONFIG_HOTPLUG_CPU
1654
1655static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
1656                struct hrtimer_clock_base *new_base)
1657{
1658    struct hrtimer *timer;
1659    struct timerqueue_node *node;
1660
1661    while ((node = timerqueue_getnext(&old_base->active))) {
1662        timer = container_of(node, struct hrtimer, node);
1663        BUG_ON(hrtimer_callback_running(timer));
1664        debug_deactivate(timer);
1665
1666        /*
1667         * Mark it as STATE_MIGRATE not INACTIVE otherwise the
1668         * timer could be seen as !active and just vanish away
1669         * under us on another CPU
1670         */
1671        __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
1672        timer->base = new_base;
1673        /*
1674         * Enqueue the timers on the new cpu. This does not
1675         * reprogram the event device in case the timer
1676         * expires before the earliest on this CPU, but we run
1677         * hrtimer_interrupt after we migrated everything to
1678         * sort out already expired timers and reprogram the
1679         * event device.
1680         */
1681        enqueue_hrtimer(timer, new_base);
1682
1683        /* Clear the migration state bit */
1684        timer->state &= ~HRTIMER_STATE_MIGRATE;
1685    }
1686}
1687
1688static void migrate_hrtimers(int scpu)
1689{
1690    struct hrtimer_cpu_base *old_base, *new_base;
1691    int i;
1692
1693    BUG_ON(cpu_online(scpu));
1694    tick_cancel_sched_timer(scpu);
1695
1696    local_irq_disable();
1697    old_base = &per_cpu(hrtimer_bases, scpu);
1698    new_base = &__get_cpu_var(hrtimer_bases);
1699    /*
1700     * The caller is globally serialized and nobody else
1701     * takes two locks at once, deadlock is not possible.
1702     */
1703    raw_spin_lock(&new_base->lock);
1704    raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1705
1706    for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1707        migrate_hrtimer_list(&old_base->clock_base[i],
1708                     &new_base->clock_base[i]);
1709    }
1710
1711    raw_spin_unlock(&old_base->lock);
1712    raw_spin_unlock(&new_base->lock);
1713
1714    /* Check, if we got expired work to do */
1715    __hrtimer_peek_ahead_timers();
1716    local_irq_enable();
1717}
1718
1719#endif /* CONFIG_HOTPLUG_CPU */
1720
1721static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
1722                    unsigned long action, void *hcpu)
1723{
1724    int scpu = (long)hcpu;
1725
1726    switch (action) {
1727
1728    case CPU_UP_PREPARE:
1729    case CPU_UP_PREPARE_FROZEN:
1730        init_hrtimers_cpu(scpu);
1731        break;
1732
1733#ifdef CONFIG_HOTPLUG_CPU
1734    case CPU_DYING:
1735    case CPU_DYING_FROZEN:
1736        clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
1737        break;
1738    case CPU_DEAD:
1739    case CPU_DEAD_FROZEN:
1740    {
1741        clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
1742        migrate_hrtimers(scpu);
1743        break;
1744    }
1745#endif
1746
1747    default:
1748        break;
1749    }
1750
1751    return NOTIFY_OK;
1752}
1753
1754static struct notifier_block __cpuinitdata hrtimers_nb = {
1755    .notifier_call = hrtimer_cpu_notify,
1756};
1757
1758void __init hrtimers_init(void)
1759{
1760    hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
1761              (void *)(long)smp_processor_id());
1762    register_cpu_notifier(&hrtimers_nb);
1763#ifdef CONFIG_HIGH_RES_TIMERS
1764    open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
1765#endif
1766}
1767
1768/**
1769 * schedule_hrtimeout_range_clock - sleep until timeout
1770 * @expires: timeout value (ktime_t)
1771 * @delta: slack in expires timeout (ktime_t)
1772 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1773 * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
1774 */
1775int __sched
1776schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
1777                   const enum hrtimer_mode mode, int clock)
1778{
1779    struct hrtimer_sleeper t;
1780
1781    /*
1782     * Optimize when a zero timeout value is given. It does not
1783     * matter whether this is an absolute or a relative time.
1784     */
1785    if (expires && !expires->tv64) {
1786        __set_current_state(TASK_RUNNING);
1787        return 0;
1788    }
1789
1790    /*
1791     * A NULL parameter means "infinite"
1792     */
1793    if (!expires) {
1794        schedule();
1795        __set_current_state(TASK_RUNNING);
1796        return -EINTR;
1797    }
1798
1799    hrtimer_init_on_stack(&t.timer, clock, mode);
1800    hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
1801
1802    hrtimer_init_sleeper(&t, current);
1803
1804    hrtimer_start_expires(&t.timer, mode);
1805    if (!hrtimer_active(&t.timer))
1806        t.task = NULL;
1807
1808    if (likely(t.task))
1809        schedule();
1810
1811    hrtimer_cancel(&t.timer);
1812    destroy_hrtimer_on_stack(&t.timer);
1813
1814    __set_current_state(TASK_RUNNING);
1815
1816    return !t.task ? 0 : -EINTR;
1817}
1818
1819/**
1820 * schedule_hrtimeout_range - sleep until timeout
1821 * @expires: timeout value (ktime_t)
1822 * @delta: slack in expires timeout (ktime_t)
1823 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1824 *
1825 * Make the current task sleep until the given expiry time has
1826 * elapsed. The routine will return immediately unless
1827 * the current task state has been set (see set_current_state()).
1828 *
1829 * The @delta argument gives the kernel the freedom to schedule the
1830 * actual wakeup to a time that is both power and performance friendly.
1831 * The kernel give the normal best effort behavior for "@expires+@delta",
1832 * but may decide to fire the timer earlier, but no earlier than @expires.
1833 *
1834 * You can set the task state as follows -
1835 *
1836 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1837 * pass before the routine returns.
1838 *
1839 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1840 * delivered to the current task.
1841 *
1842 * The current task state is guaranteed to be TASK_RUNNING when this
1843 * routine returns.
1844 *
1845 * Returns 0 when the timer has expired otherwise -EINTR
1846 */
1847int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
1848                     const enum hrtimer_mode mode)
1849{
1850    return schedule_hrtimeout_range_clock(expires, delta, mode,
1851                          CLOCK_MONOTONIC);
1852}
1853EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
1854
1855/**
1856 * schedule_hrtimeout - sleep until timeout
1857 * @expires: timeout value (ktime_t)
1858 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1859 *
1860 * Make the current task sleep until the given expiry time has
1861 * elapsed. The routine will return immediately unless
1862 * the current task state has been set (see set_current_state()).
1863 *
1864 * You can set the task state as follows -
1865 *
1866 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1867 * pass before the routine returns.
1868 *
1869 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1870 * delivered to the current task.
1871 *
1872 * The current task state is guaranteed to be TASK_RUNNING when this
1873 * routine returns.
1874 *
1875 * Returns 0 when the timer has expired otherwise -EINTR
1876 */
1877int __sched schedule_hrtimeout(ktime_t *expires,
1878                   const enum hrtimer_mode mode)
1879{
1880    return schedule_hrtimeout_range(expires, 0, mode);
1881}
1882EXPORT_SYMBOL_GPL(schedule_hrtimeout);
1883

Archive Download this file



interactive