Root/kernel/timer.c

1/*
2 * linux/kernel/timer.c
3 *
4 * Kernel internal timers, basic process system calls
5 *
6 * Copyright (C) 1991, 1992 Linus Torvalds
7 *
8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
9 *
10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
11 * "A Kernel Model for Precision Timekeeping" by Dave Mills
12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
13 * serialize accesses to xtime/lost_ticks).
14 * Copyright (C) 1998 Andrea Arcangeli
15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl
16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love
17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling.
18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar
19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
20 */
21
22#include <linux/kernel_stat.h>
23#include <linux/module.h>
24#include <linux/interrupt.h>
25#include <linux/percpu.h>
26#include <linux/init.h>
27#include <linux/mm.h>
28#include <linux/swap.h>
29#include <linux/pid_namespace.h>
30#include <linux/notifier.h>
31#include <linux/thread_info.h>
32#include <linux/time.h>
33#include <linux/jiffies.h>
34#include <linux/posix-timers.h>
35#include <linux/cpu.h>
36#include <linux/syscalls.h>
37#include <linux/delay.h>
38#include <linux/tick.h>
39#include <linux/kallsyms.h>
40#include <linux/perf_event.h>
41#include <linux/sched.h>
42#include <linux/slab.h>
43
44#include <asm/uaccess.h>
45#include <asm/unistd.h>
46#include <asm/div64.h>
47#include <asm/timex.h>
48#include <asm/io.h>
49
50#define CREATE_TRACE_POINTS
51#include <trace/events/timer.h>
52
53u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
54
55EXPORT_SYMBOL(jiffies_64);
56
57/*
58 * per-CPU timer vector definitions:
59 */
60#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
61#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
62#define TVN_SIZE (1 << TVN_BITS)
63#define TVR_SIZE (1 << TVR_BITS)
64#define TVN_MASK (TVN_SIZE - 1)
65#define TVR_MASK (TVR_SIZE - 1)
66
67struct tvec {
68    struct list_head vec[TVN_SIZE];
69};
70
71struct tvec_root {
72    struct list_head vec[TVR_SIZE];
73};
74
75struct tvec_base {
76    spinlock_t lock;
77    struct timer_list *running_timer;
78    unsigned long timer_jiffies;
79    unsigned long next_timer;
80    struct tvec_root tv1;
81    struct tvec tv2;
82    struct tvec tv3;
83    struct tvec tv4;
84    struct tvec tv5;
85} ____cacheline_aligned;
86
87struct tvec_base boot_tvec_bases;
88EXPORT_SYMBOL(boot_tvec_bases);
89static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
90
91/*
92 * Note that all tvec_bases are 2 byte aligned and lower bit of
93 * base in timer_list is guaranteed to be zero. Use the LSB for
94 * the new flag to indicate whether the timer is deferrable
95 */
96#define TBASE_DEFERRABLE_FLAG (0x1)
97
98/* Functions below help us manage 'deferrable' flag */
99static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
100{
101    return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
102}
103
104static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
105{
106    return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
107}
108
109static inline void timer_set_deferrable(struct timer_list *timer)
110{
111    timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
112                       TBASE_DEFERRABLE_FLAG));
113}
114
115static inline void
116timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
117{
118    timer->base = (struct tvec_base *)((unsigned long)(new_base) |
119                      tbase_get_deferrable(timer->base));
120}
121
122static unsigned long round_jiffies_common(unsigned long j, int cpu,
123        bool force_up)
124{
125    int rem;
126    unsigned long original = j;
127
128    /*
129     * We don't want all cpus firing their timers at once hitting the
130     * same lock or cachelines, so we skew each extra cpu with an extra
131     * 3 jiffies. This 3 jiffies came originally from the mm/ code which
132     * already did this.
133     * The skew is done by adding 3*cpunr, then round, then subtract this
134     * extra offset again.
135     */
136    j += cpu * 3;
137
138    rem = j % HZ;
139
140    /*
141     * If the target jiffie is just after a whole second (which can happen
142     * due to delays of the timer irq, long irq off times etc etc) then
143     * we should round down to the whole second, not up. Use 1/4th second
144     * as cutoff for this rounding as an extreme upper bound for this.
145     * But never round down if @force_up is set.
146     */
147    if (rem < HZ/4 && !force_up) /* round down */
148        j = j - rem;
149    else /* round up */
150        j = j - rem + HZ;
151
152    /* now that we have rounded, subtract the extra skew again */
153    j -= cpu * 3;
154
155    if (j <= jiffies) /* rounding ate our timeout entirely; */
156        return original;
157    return j;
158}
159
160/**
161 * __round_jiffies - function to round jiffies to a full second
162 * @j: the time in (absolute) jiffies that should be rounded
163 * @cpu: the processor number on which the timeout will happen
164 *
165 * __round_jiffies() rounds an absolute time in the future (in jiffies)
166 * up or down to (approximately) full seconds. This is useful for timers
167 * for which the exact time they fire does not matter too much, as long as
168 * they fire approximately every X seconds.
169 *
170 * By rounding these timers to whole seconds, all such timers will fire
171 * at the same time, rather than at various times spread out. The goal
172 * of this is to have the CPU wake up less, which saves power.
173 *
174 * The exact rounding is skewed for each processor to avoid all
175 * processors firing at the exact same time, which could lead
176 * to lock contention or spurious cache line bouncing.
177 *
178 * The return value is the rounded version of the @j parameter.
179 */
180unsigned long __round_jiffies(unsigned long j, int cpu)
181{
182    return round_jiffies_common(j, cpu, false);
183}
184EXPORT_SYMBOL_GPL(__round_jiffies);
185
186/**
187 * __round_jiffies_relative - function to round jiffies to a full second
188 * @j: the time in (relative) jiffies that should be rounded
189 * @cpu: the processor number on which the timeout will happen
190 *
191 * __round_jiffies_relative() rounds a time delta in the future (in jiffies)
192 * up or down to (approximately) full seconds. This is useful for timers
193 * for which the exact time they fire does not matter too much, as long as
194 * they fire approximately every X seconds.
195 *
196 * By rounding these timers to whole seconds, all such timers will fire
197 * at the same time, rather than at various times spread out. The goal
198 * of this is to have the CPU wake up less, which saves power.
199 *
200 * The exact rounding is skewed for each processor to avoid all
201 * processors firing at the exact same time, which could lead
202 * to lock contention or spurious cache line bouncing.
203 *
204 * The return value is the rounded version of the @j parameter.
205 */
206unsigned long __round_jiffies_relative(unsigned long j, int cpu)
207{
208    unsigned long j0 = jiffies;
209
210    /* Use j0 because jiffies might change while we run */
211    return round_jiffies_common(j + j0, cpu, false) - j0;
212}
213EXPORT_SYMBOL_GPL(__round_jiffies_relative);
214
215/**
216 * round_jiffies - function to round jiffies to a full second
217 * @j: the time in (absolute) jiffies that should be rounded
218 *
219 * round_jiffies() rounds an absolute time in the future (in jiffies)
220 * up or down to (approximately) full seconds. This is useful for timers
221 * for which the exact time they fire does not matter too much, as long as
222 * they fire approximately every X seconds.
223 *
224 * By rounding these timers to whole seconds, all such timers will fire
225 * at the same time, rather than at various times spread out. The goal
226 * of this is to have the CPU wake up less, which saves power.
227 *
228 * The return value is the rounded version of the @j parameter.
229 */
230unsigned long round_jiffies(unsigned long j)
231{
232    return round_jiffies_common(j, raw_smp_processor_id(), false);
233}
234EXPORT_SYMBOL_GPL(round_jiffies);
235
236/**
237 * round_jiffies_relative - function to round jiffies to a full second
238 * @j: the time in (relative) jiffies that should be rounded
239 *
240 * round_jiffies_relative() rounds a time delta in the future (in jiffies)
241 * up or down to (approximately) full seconds. This is useful for timers
242 * for which the exact time they fire does not matter too much, as long as
243 * they fire approximately every X seconds.
244 *
245 * By rounding these timers to whole seconds, all such timers will fire
246 * at the same time, rather than at various times spread out. The goal
247 * of this is to have the CPU wake up less, which saves power.
248 *
249 * The return value is the rounded version of the @j parameter.
250 */
251unsigned long round_jiffies_relative(unsigned long j)
252{
253    return __round_jiffies_relative(j, raw_smp_processor_id());
254}
255EXPORT_SYMBOL_GPL(round_jiffies_relative);
256
257/**
258 * __round_jiffies_up - function to round jiffies up to a full second
259 * @j: the time in (absolute) jiffies that should be rounded
260 * @cpu: the processor number on which the timeout will happen
261 *
262 * This is the same as __round_jiffies() except that it will never
263 * round down. This is useful for timeouts for which the exact time
264 * of firing does not matter too much, as long as they don't fire too
265 * early.
266 */
267unsigned long __round_jiffies_up(unsigned long j, int cpu)
268{
269    return round_jiffies_common(j, cpu, true);
270}
271EXPORT_SYMBOL_GPL(__round_jiffies_up);
272
273/**
274 * __round_jiffies_up_relative - function to round jiffies up to a full second
275 * @j: the time in (relative) jiffies that should be rounded
276 * @cpu: the processor number on which the timeout will happen
277 *
278 * This is the same as __round_jiffies_relative() except that it will never
279 * round down. This is useful for timeouts for which the exact time
280 * of firing does not matter too much, as long as they don't fire too
281 * early.
282 */
283unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
284{
285    unsigned long j0 = jiffies;
286
287    /* Use j0 because jiffies might change while we run */
288    return round_jiffies_common(j + j0, cpu, true) - j0;
289}
290EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
291
292/**
293 * round_jiffies_up - function to round jiffies up to a full second
294 * @j: the time in (absolute) jiffies that should be rounded
295 *
296 * This is the same as round_jiffies() except that it will never
297 * round down. This is useful for timeouts for which the exact time
298 * of firing does not matter too much, as long as they don't fire too
299 * early.
300 */
301unsigned long round_jiffies_up(unsigned long j)
302{
303    return round_jiffies_common(j, raw_smp_processor_id(), true);
304}
305EXPORT_SYMBOL_GPL(round_jiffies_up);
306
307/**
308 * round_jiffies_up_relative - function to round jiffies up to a full second
309 * @j: the time in (relative) jiffies that should be rounded
310 *
311 * This is the same as round_jiffies_relative() except that it will never
312 * round down. This is useful for timeouts for which the exact time
313 * of firing does not matter too much, as long as they don't fire too
314 * early.
315 */
316unsigned long round_jiffies_up_relative(unsigned long j)
317{
318    return __round_jiffies_up_relative(j, raw_smp_processor_id());
319}
320EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
321
322
323static inline void set_running_timer(struct tvec_base *base,
324                    struct timer_list *timer)
325{
326#ifdef CONFIG_SMP
327    base->running_timer = timer;
328#endif
329}
330
331static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
332{
333    unsigned long expires = timer->expires;
334    unsigned long idx = expires - base->timer_jiffies;
335    struct list_head *vec;
336
337    if (idx < TVR_SIZE) {
338        int i = expires & TVR_MASK;
339        vec = base->tv1.vec + i;
340    } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
341        int i = (expires >> TVR_BITS) & TVN_MASK;
342        vec = base->tv2.vec + i;
343    } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
344        int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
345        vec = base->tv3.vec + i;
346    } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
347        int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
348        vec = base->tv4.vec + i;
349    } else if ((signed long) idx < 0) {
350        /*
351         * Can happen if you add a timer with expires == jiffies,
352         * or you set a timer to go off in the past
353         */
354        vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
355    } else {
356        int i;
357        /* If the timeout is larger than 0xffffffff on 64-bit
358         * architectures then we use the maximum timeout:
359         */
360        if (idx > 0xffffffffUL) {
361            idx = 0xffffffffUL;
362            expires = idx + base->timer_jiffies;
363        }
364        i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
365        vec = base->tv5.vec + i;
366    }
367    /*
368     * Timers are FIFO:
369     */
370    list_add_tail(&timer->entry, vec);
371}
372
373#ifdef CONFIG_TIMER_STATS
374void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
375{
376    if (timer->start_site)
377        return;
378
379    timer->start_site = addr;
380    memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
381    timer->start_pid = current->pid;
382}
383
384static void timer_stats_account_timer(struct timer_list *timer)
385{
386    unsigned int flag = 0;
387
388    if (likely(!timer->start_site))
389        return;
390    if (unlikely(tbase_get_deferrable(timer->base)))
391        flag |= TIMER_STATS_FLAG_DEFERRABLE;
392
393    timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
394                 timer->function, timer->start_comm, flag);
395}
396
397#else
398static void timer_stats_account_timer(struct timer_list *timer) {}
399#endif
400
401#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
402
403static struct debug_obj_descr timer_debug_descr;
404
405/*
406 * fixup_init is called when:
407 * - an active object is initialized
408 */
409static int timer_fixup_init(void *addr, enum debug_obj_state state)
410{
411    struct timer_list *timer = addr;
412
413    switch (state) {
414    case ODEBUG_STATE_ACTIVE:
415        del_timer_sync(timer);
416        debug_object_init(timer, &timer_debug_descr);
417        return 1;
418    default:
419        return 0;
420    }
421}
422
423/*
424 * fixup_activate is called when:
425 * - an active object is activated
426 * - an unknown object is activated (might be a statically initialized object)
427 */
428static int timer_fixup_activate(void *addr, enum debug_obj_state state)
429{
430    struct timer_list *timer = addr;
431
432    switch (state) {
433
434    case ODEBUG_STATE_NOTAVAILABLE:
435        /*
436         * This is not really a fixup. The timer was
437         * statically initialized. We just make sure that it
438         * is tracked in the object tracker.
439         */
440        if (timer->entry.next == NULL &&
441            timer->entry.prev == TIMER_ENTRY_STATIC) {
442            debug_object_init(timer, &timer_debug_descr);
443            debug_object_activate(timer, &timer_debug_descr);
444            return 0;
445        } else {
446            WARN_ON_ONCE(1);
447        }
448        return 0;
449
450    case ODEBUG_STATE_ACTIVE:
451        WARN_ON(1);
452
453    default:
454        return 0;
455    }
456}
457
458/*
459 * fixup_free is called when:
460 * - an active object is freed
461 */
462static int timer_fixup_free(void *addr, enum debug_obj_state state)
463{
464    struct timer_list *timer = addr;
465
466    switch (state) {
467    case ODEBUG_STATE_ACTIVE:
468        del_timer_sync(timer);
469        debug_object_free(timer, &timer_debug_descr);
470        return 1;
471    default:
472        return 0;
473    }
474}
475
476static struct debug_obj_descr timer_debug_descr = {
477    .name = "timer_list",
478    .fixup_init = timer_fixup_init,
479    .fixup_activate = timer_fixup_activate,
480    .fixup_free = timer_fixup_free,
481};
482
483static inline void debug_timer_init(struct timer_list *timer)
484{
485    debug_object_init(timer, &timer_debug_descr);
486}
487
488static inline void debug_timer_activate(struct timer_list *timer)
489{
490    debug_object_activate(timer, &timer_debug_descr);
491}
492
493static inline void debug_timer_deactivate(struct timer_list *timer)
494{
495    debug_object_deactivate(timer, &timer_debug_descr);
496}
497
498static inline void debug_timer_free(struct timer_list *timer)
499{
500    debug_object_free(timer, &timer_debug_descr);
501}
502
503static void __init_timer(struct timer_list *timer,
504             const char *name,
505             struct lock_class_key *key);
506
507void init_timer_on_stack_key(struct timer_list *timer,
508                 const char *name,
509                 struct lock_class_key *key)
510{
511    debug_object_init_on_stack(timer, &timer_debug_descr);
512    __init_timer(timer, name, key);
513}
514EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
515
516void destroy_timer_on_stack(struct timer_list *timer)
517{
518    debug_object_free(timer, &timer_debug_descr);
519}
520EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
521
522#else
523static inline void debug_timer_init(struct timer_list *timer) { }
524static inline void debug_timer_activate(struct timer_list *timer) { }
525static inline void debug_timer_deactivate(struct timer_list *timer) { }
526#endif
527
528static inline void debug_init(struct timer_list *timer)
529{
530    debug_timer_init(timer);
531    trace_timer_init(timer);
532}
533
534static inline void
535debug_activate(struct timer_list *timer, unsigned long expires)
536{
537    debug_timer_activate(timer);
538    trace_timer_start(timer, expires);
539}
540
541static inline void debug_deactivate(struct timer_list *timer)
542{
543    debug_timer_deactivate(timer);
544    trace_timer_cancel(timer);
545}
546
547static void __init_timer(struct timer_list *timer,
548             const char *name,
549             struct lock_class_key *key)
550{
551    timer->entry.next = NULL;
552    timer->base = __raw_get_cpu_var(tvec_bases);
553#ifdef CONFIG_TIMER_STATS
554    timer->start_site = NULL;
555    timer->start_pid = -1;
556    memset(timer->start_comm, 0, TASK_COMM_LEN);
557#endif
558    lockdep_init_map(&timer->lockdep_map, name, key, 0);
559}
560
561/**
562 * init_timer_key - initialize a timer
563 * @timer: the timer to be initialized
564 * @name: name of the timer
565 * @key: lockdep class key of the fake lock used for tracking timer
566 * sync lock dependencies
567 *
568 * init_timer_key() must be done to a timer prior calling *any* of the
569 * other timer functions.
570 */
571void init_timer_key(struct timer_list *timer,
572            const char *name,
573            struct lock_class_key *key)
574{
575    debug_init(timer);
576    __init_timer(timer, name, key);
577}
578EXPORT_SYMBOL(init_timer_key);
579
580void init_timer_deferrable_key(struct timer_list *timer,
581                   const char *name,
582                   struct lock_class_key *key)
583{
584    init_timer_key(timer, name, key);
585    timer_set_deferrable(timer);
586}
587EXPORT_SYMBOL(init_timer_deferrable_key);
588
589static inline void detach_timer(struct timer_list *timer,
590                int clear_pending)
591{
592    struct list_head *entry = &timer->entry;
593
594    debug_deactivate(timer);
595
596    __list_del(entry->prev, entry->next);
597    if (clear_pending)
598        entry->next = NULL;
599    entry->prev = LIST_POISON2;
600}
601
602/*
603 * We are using hashed locking: holding per_cpu(tvec_bases).lock
604 * means that all timers which are tied to this base via timer->base are
605 * locked, and the base itself is locked too.
606 *
607 * So __run_timers/migrate_timers can safely modify all timers which could
608 * be found on ->tvX lists.
609 *
610 * When the timer's base is locked, and the timer removed from list, it is
611 * possible to set timer->base = NULL and drop the lock: the timer remains
612 * locked.
613 */
614static struct tvec_base *lock_timer_base(struct timer_list *timer,
615                    unsigned long *flags)
616    __acquires(timer->base->lock)
617{
618    struct tvec_base *base;
619
620    for (;;) {
621        struct tvec_base *prelock_base = timer->base;
622        base = tbase_get_base(prelock_base);
623        if (likely(base != NULL)) {
624            spin_lock_irqsave(&base->lock, *flags);
625            if (likely(prelock_base == timer->base))
626                return base;
627            /* The timer has migrated to another CPU */
628            spin_unlock_irqrestore(&base->lock, *flags);
629        }
630        cpu_relax();
631    }
632}
633
634static inline int
635__mod_timer(struct timer_list *timer, unsigned long expires,
636                        bool pending_only, int pinned)
637{
638    struct tvec_base *base, *new_base;
639    unsigned long flags;
640    int ret = 0 , cpu;
641
642    timer_stats_timer_set_start_info(timer);
643    BUG_ON(!timer->function);
644
645    base = lock_timer_base(timer, &flags);
646
647    if (timer_pending(timer)) {
648        detach_timer(timer, 0);
649        if (timer->expires == base->next_timer &&
650            !tbase_get_deferrable(timer->base))
651            base->next_timer = base->timer_jiffies;
652        ret = 1;
653    } else {
654        if (pending_only)
655            goto out_unlock;
656    }
657
658    debug_activate(timer, expires);
659
660    cpu = smp_processor_id();
661
662#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
663    if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
664        int preferred_cpu = get_nohz_load_balancer();
665
666        if (preferred_cpu >= 0)
667            cpu = preferred_cpu;
668    }
669#endif
670    new_base = per_cpu(tvec_bases, cpu);
671
672    if (base != new_base) {
673        /*
674         * We are trying to schedule the timer on the local CPU.
675         * However we can't change timer's base while it is running,
676         * otherwise del_timer_sync() can't detect that the timer's
677         * handler yet has not finished. This also guarantees that
678         * the timer is serialized wrt itself.
679         */
680        if (likely(base->running_timer != timer)) {
681            /* See the comment in lock_timer_base() */
682            timer_set_base(timer, NULL);
683            spin_unlock(&base->lock);
684            base = new_base;
685            spin_lock(&base->lock);
686            timer_set_base(timer, base);
687        }
688    }
689
690    timer->expires = expires;
691    if (time_before(timer->expires, base->next_timer) &&
692        !tbase_get_deferrable(timer->base))
693        base->next_timer = timer->expires;
694    internal_add_timer(base, timer);
695
696out_unlock:
697    spin_unlock_irqrestore(&base->lock, flags);
698
699    return ret;
700}
701
702/**
703 * mod_timer_pending - modify a pending timer's timeout
704 * @timer: the pending timer to be modified
705 * @expires: new timeout in jiffies
706 *
707 * mod_timer_pending() is the same for pending timers as mod_timer(),
708 * but will not re-activate and modify already deleted timers.
709 *
710 * It is useful for unserialized use of timers.
711 */
712int mod_timer_pending(struct timer_list *timer, unsigned long expires)
713{
714    return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
715}
716EXPORT_SYMBOL(mod_timer_pending);
717
718/**
719 * mod_timer - modify a timer's timeout
720 * @timer: the timer to be modified
721 * @expires: new timeout in jiffies
722 *
723 * mod_timer() is a more efficient way to update the expire field of an
724 * active timer (if the timer is inactive it will be activated)
725 *
726 * mod_timer(timer, expires) is equivalent to:
727 *
728 * del_timer(timer); timer->expires = expires; add_timer(timer);
729 *
730 * Note that if there are multiple unserialized concurrent users of the
731 * same timer, then mod_timer() is the only safe way to modify the timeout,
732 * since add_timer() cannot modify an already running timer.
733 *
734 * The function returns whether it has modified a pending timer or not.
735 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
736 * active timer returns 1.)
737 */
738int mod_timer(struct timer_list *timer, unsigned long expires)
739{
740    /*
741     * This is a common optimization triggered by the
742     * networking code - if the timer is re-modified
743     * to be the same thing then just return:
744     */
745    if (timer_pending(timer) && timer->expires == expires)
746        return 1;
747
748    return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
749}
750EXPORT_SYMBOL(mod_timer);
751
752/**
753 * mod_timer_pinned - modify a timer's timeout
754 * @timer: the timer to be modified
755 * @expires: new timeout in jiffies
756 *
757 * mod_timer_pinned() is a way to update the expire field of an
758 * active timer (if the timer is inactive it will be activated)
759 * and not allow the timer to be migrated to a different CPU.
760 *
761 * mod_timer_pinned(timer, expires) is equivalent to:
762 *
763 * del_timer(timer); timer->expires = expires; add_timer(timer);
764 */
765int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
766{
767    if (timer->expires == expires && timer_pending(timer))
768        return 1;
769
770    return __mod_timer(timer, expires, false, TIMER_PINNED);
771}
772EXPORT_SYMBOL(mod_timer_pinned);
773
774/**
775 * add_timer - start a timer
776 * @timer: the timer to be added
777 *
778 * The kernel will do a ->function(->data) callback from the
779 * timer interrupt at the ->expires point in the future. The
780 * current time is 'jiffies'.
781 *
782 * The timer's ->expires, ->function (and if the handler uses it, ->data)
783 * fields must be set prior calling this function.
784 *
785 * Timers with an ->expires field in the past will be executed in the next
786 * timer tick.
787 */
788void add_timer(struct timer_list *timer)
789{
790    BUG_ON(timer_pending(timer));
791    mod_timer(timer, timer->expires);
792}
793EXPORT_SYMBOL(add_timer);
794
795/**
796 * add_timer_on - start a timer on a particular CPU
797 * @timer: the timer to be added
798 * @cpu: the CPU to start it on
799 *
800 * This is not very scalable on SMP. Double adds are not possible.
801 */
802void add_timer_on(struct timer_list *timer, int cpu)
803{
804    struct tvec_base *base = per_cpu(tvec_bases, cpu);
805    unsigned long flags;
806
807    timer_stats_timer_set_start_info(timer);
808    BUG_ON(timer_pending(timer) || !timer->function);
809    spin_lock_irqsave(&base->lock, flags);
810    timer_set_base(timer, base);
811    debug_activate(timer, timer->expires);
812    if (time_before(timer->expires, base->next_timer) &&
813        !tbase_get_deferrable(timer->base))
814        base->next_timer = timer->expires;
815    internal_add_timer(base, timer);
816    /*
817     * Check whether the other CPU is idle and needs to be
818     * triggered to reevaluate the timer wheel when nohz is
819     * active. We are protected against the other CPU fiddling
820     * with the timer by holding the timer base lock. This also
821     * makes sure that a CPU on the way to idle can not evaluate
822     * the timer wheel.
823     */
824    wake_up_idle_cpu(cpu);
825    spin_unlock_irqrestore(&base->lock, flags);
826}
827EXPORT_SYMBOL_GPL(add_timer_on);
828
829/**
830 * del_timer - deactive a timer.
831 * @timer: the timer to be deactivated
832 *
833 * del_timer() deactivates a timer - this works on both active and inactive
834 * timers.
835 *
836 * The function returns whether it has deactivated a pending timer or not.
837 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
838 * active timer returns 1.)
839 */
840int del_timer(struct timer_list *timer)
841{
842    struct tvec_base *base;
843    unsigned long flags;
844    int ret = 0;
845
846    timer_stats_timer_clear_start_info(timer);
847    if (timer_pending(timer)) {
848        base = lock_timer_base(timer, &flags);
849        if (timer_pending(timer)) {
850            detach_timer(timer, 1);
851            if (timer->expires == base->next_timer &&
852                !tbase_get_deferrable(timer->base))
853                base->next_timer = base->timer_jiffies;
854            ret = 1;
855        }
856        spin_unlock_irqrestore(&base->lock, flags);
857    }
858
859    return ret;
860}
861EXPORT_SYMBOL(del_timer);
862
863#ifdef CONFIG_SMP
864/**
865 * try_to_del_timer_sync - Try to deactivate a timer
866 * @timer: timer do del
867 *
868 * This function tries to deactivate a timer. Upon successful (ret >= 0)
869 * exit the timer is not queued and the handler is not running on any CPU.
870 *
871 * It must not be called from interrupt contexts.
872 */
873int try_to_del_timer_sync(struct timer_list *timer)
874{
875    struct tvec_base *base;
876    unsigned long flags;
877    int ret = -1;
878
879    base = lock_timer_base(timer, &flags);
880
881    if (base->running_timer == timer)
882        goto out;
883
884    timer_stats_timer_clear_start_info(timer);
885    ret = 0;
886    if (timer_pending(timer)) {
887        detach_timer(timer, 1);
888        if (timer->expires == base->next_timer &&
889            !tbase_get_deferrable(timer->base))
890            base->next_timer = base->timer_jiffies;
891        ret = 1;
892    }
893out:
894    spin_unlock_irqrestore(&base->lock, flags);
895
896    return ret;
897}
898EXPORT_SYMBOL(try_to_del_timer_sync);
899
900/**
901 * del_timer_sync - deactivate a timer and wait for the handler to finish.
902 * @timer: the timer to be deactivated
903 *
904 * This function only differs from del_timer() on SMP: besides deactivating
905 * the timer it also makes sure the handler has finished executing on other
906 * CPUs.
907 *
908 * Synchronization rules: Callers must prevent restarting of the timer,
909 * otherwise this function is meaningless. It must not be called from
910 * interrupt contexts. The caller must not hold locks which would prevent
911 * completion of the timer's handler. The timer's handler must not call
912 * add_timer_on(). Upon exit the timer is not queued and the handler is
913 * not running on any CPU.
914 *
915 * The function returns whether it has deactivated a pending timer or not.
916 */
917int del_timer_sync(struct timer_list *timer)
918{
919#ifdef CONFIG_LOCKDEP
920    unsigned long flags;
921
922    local_irq_save(flags);
923    lock_map_acquire(&timer->lockdep_map);
924    lock_map_release(&timer->lockdep_map);
925    local_irq_restore(flags);
926#endif
927
928    for (;;) {
929        int ret = try_to_del_timer_sync(timer);
930        if (ret >= 0)
931            return ret;
932        cpu_relax();
933    }
934}
935EXPORT_SYMBOL(del_timer_sync);
936#endif
937
938static int cascade(struct tvec_base *base, struct tvec *tv, int index)
939{
940    /* cascade all the timers from tv up one level */
941    struct timer_list *timer, *tmp;
942    struct list_head tv_list;
943
944    list_replace_init(tv->vec + index, &tv_list);
945
946    /*
947     * We are removing _all_ timers from the list, so we
948     * don't have to detach them individually.
949     */
950    list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
951        BUG_ON(tbase_get_base(timer->base) != base);
952        internal_add_timer(base, timer);
953    }
954
955    return index;
956}
957
958#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
959
960/**
961 * __run_timers - run all expired timers (if any) on this CPU.
962 * @base: the timer vector to be processed.
963 *
964 * This function cascades all vectors and executes all expired timer
965 * vectors.
966 */
967static inline void __run_timers(struct tvec_base *base)
968{
969    struct timer_list *timer;
970
971    spin_lock_irq(&base->lock);
972    while (time_after_eq(jiffies, base->timer_jiffies)) {
973        struct list_head work_list;
974        struct list_head *head = &work_list;
975        int index = base->timer_jiffies & TVR_MASK;
976
977        /*
978         * Cascade timers:
979         */
980        if (!index &&
981            (!cascade(base, &base->tv2, INDEX(0))) &&
982                (!cascade(base, &base->tv3, INDEX(1))) &&
983                    !cascade(base, &base->tv4, INDEX(2)))
984            cascade(base, &base->tv5, INDEX(3));
985        ++base->timer_jiffies;
986        list_replace_init(base->tv1.vec + index, &work_list);
987        while (!list_empty(head)) {
988            void (*fn)(unsigned long);
989            unsigned long data;
990
991            timer = list_first_entry(head, struct timer_list,entry);
992            fn = timer->function;
993            data = timer->data;
994
995            timer_stats_account_timer(timer);
996
997            set_running_timer(base, timer);
998            detach_timer(timer, 1);
999
1000            spin_unlock_irq(&base->lock);
1001            {
1002                int preempt_count = preempt_count();
1003
1004#ifdef CONFIG_LOCKDEP
1005                /*
1006                 * It is permissible to free the timer from
1007                 * inside the function that is called from
1008                 * it, this we need to take into account for
1009                 * lockdep too. To avoid bogus "held lock
1010                 * freed" warnings as well as problems when
1011                 * looking into timer->lockdep_map, make a
1012                 * copy and use that here.
1013                 */
1014                struct lockdep_map lockdep_map =
1015                    timer->lockdep_map;
1016#endif
1017                /*
1018                 * Couple the lock chain with the lock chain at
1019                 * del_timer_sync() by acquiring the lock_map
1020                 * around the fn() call here and in
1021                 * del_timer_sync().
1022                 */
1023                lock_map_acquire(&lockdep_map);
1024
1025                trace_timer_expire_entry(timer);
1026                fn(data);
1027                trace_timer_expire_exit(timer);
1028
1029                lock_map_release(&lockdep_map);
1030
1031                if (preempt_count != preempt_count()) {
1032                    printk(KERN_ERR "huh, entered %p "
1033                           "with preempt_count %08x, exited"
1034                           " with %08x?\n",
1035                           fn, preempt_count,
1036                           preempt_count());
1037                    BUG();
1038                }
1039            }
1040            spin_lock_irq(&base->lock);
1041        }
1042    }
1043    set_running_timer(base, NULL);
1044    spin_unlock_irq(&base->lock);
1045}
1046
1047#ifdef CONFIG_NO_HZ
1048/*
1049 * Find out when the next timer event is due to happen. This
1050 * is used on S/390 to stop all activity when a CPU is idle.
1051 * This function needs to be called with interrupts disabled.
1052 */
1053static unsigned long __next_timer_interrupt(struct tvec_base *base)
1054{
1055    unsigned long timer_jiffies = base->timer_jiffies;
1056    unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
1057    int index, slot, array, found = 0;
1058    struct timer_list *nte;
1059    struct tvec *varray[4];
1060
1061    /* Look for timer events in tv1. */
1062    index = slot = timer_jiffies & TVR_MASK;
1063    do {
1064        list_for_each_entry(nte, base->tv1.vec + slot, entry) {
1065            if (tbase_get_deferrable(nte->base))
1066                continue;
1067
1068            found = 1;
1069            expires = nte->expires;
1070            /* Look at the cascade bucket(s)? */
1071            if (!index || slot < index)
1072                goto cascade;
1073            return expires;
1074        }
1075        slot = (slot + 1) & TVR_MASK;
1076    } while (slot != index);
1077
1078cascade:
1079    /* Calculate the next cascade event */
1080    if (index)
1081        timer_jiffies += TVR_SIZE - index;
1082    timer_jiffies >>= TVR_BITS;
1083
1084    /* Check tv2-tv5. */
1085    varray[0] = &base->tv2;
1086    varray[1] = &base->tv3;
1087    varray[2] = &base->tv4;
1088    varray[3] = &base->tv5;
1089
1090    for (array = 0; array < 4; array++) {
1091        struct tvec *varp = varray[array];
1092
1093        index = slot = timer_jiffies & TVN_MASK;
1094        do {
1095            list_for_each_entry(nte, varp->vec + slot, entry) {
1096                if (tbase_get_deferrable(nte->base))
1097                    continue;
1098
1099                found = 1;
1100                if (time_before(nte->expires, expires))
1101                    expires = nte->expires;
1102            }
1103            /*
1104             * Do we still search for the first timer or are
1105             * we looking up the cascade buckets ?
1106             */
1107            if (found) {
1108                /* Look at the cascade bucket(s)? */
1109                if (!index || slot < index)
1110                    break;
1111                return expires;
1112            }
1113            slot = (slot + 1) & TVN_MASK;
1114        } while (slot != index);
1115
1116        if (index)
1117            timer_jiffies += TVN_SIZE - index;
1118        timer_jiffies >>= TVN_BITS;
1119    }
1120    return expires;
1121}
1122
1123/*
1124 * Check, if the next hrtimer event is before the next timer wheel
1125 * event:
1126 */
1127static unsigned long cmp_next_hrtimer_event(unsigned long now,
1128                        unsigned long expires)
1129{
1130    ktime_t hr_delta = hrtimer_get_next_event();
1131    struct timespec tsdelta;
1132    unsigned long delta;
1133
1134    if (hr_delta.tv64 == KTIME_MAX)
1135        return expires;
1136
1137    /*
1138     * Expired timer available, let it expire in the next tick
1139     */
1140    if (hr_delta.tv64 <= 0)
1141        return now + 1;
1142
1143    tsdelta = ktime_to_timespec(hr_delta);
1144    delta = timespec_to_jiffies(&tsdelta);
1145
1146    /*
1147     * Limit the delta to the max value, which is checked in
1148     * tick_nohz_stop_sched_tick():
1149     */
1150    if (delta > NEXT_TIMER_MAX_DELTA)
1151        delta = NEXT_TIMER_MAX_DELTA;
1152
1153    /*
1154     * Take rounding errors in to account and make sure, that it
1155     * expires in the next tick. Otherwise we go into an endless
1156     * ping pong due to tick_nohz_stop_sched_tick() retriggering
1157     * the timer softirq
1158     */
1159    if (delta < 1)
1160        delta = 1;
1161    now += delta;
1162    if (time_before(now, expires))
1163        return now;
1164    return expires;
1165}
1166
1167/**
1168 * get_next_timer_interrupt - return the jiffy of the next pending timer
1169 * @now: current time (in jiffies)
1170 */
1171unsigned long get_next_timer_interrupt(unsigned long now)
1172{
1173    struct tvec_base *base = __get_cpu_var(tvec_bases);
1174    unsigned long expires;
1175
1176    spin_lock(&base->lock);
1177    if (time_before_eq(base->next_timer, base->timer_jiffies))
1178        base->next_timer = __next_timer_interrupt(base);
1179    expires = base->next_timer;
1180    spin_unlock(&base->lock);
1181
1182    if (time_before_eq(expires, now))
1183        return now;
1184
1185    return cmp_next_hrtimer_event(now, expires);
1186}
1187#endif
1188
1189/*
1190 * Called from the timer interrupt handler to charge one tick to the current
1191 * process. user_tick is 1 if the tick is user time, 0 for system.
1192 */
1193void update_process_times(int user_tick)
1194{
1195    struct task_struct *p = current;
1196    int cpu = smp_processor_id();
1197
1198    /* Note: this timer irq context must be accounted for as well. */
1199    account_process_tick(p, user_tick);
1200    run_local_timers();
1201    rcu_check_callbacks(cpu, user_tick);
1202    printk_tick();
1203    perf_event_do_pending();
1204    scheduler_tick();
1205    run_posix_cpu_timers(p);
1206}
1207
1208/*
1209 * This function runs timers and the timer-tq in bottom half context.
1210 */
1211static void run_timer_softirq(struct softirq_action *h)
1212{
1213    struct tvec_base *base = __get_cpu_var(tvec_bases);
1214
1215    hrtimer_run_pending();
1216
1217    if (time_after_eq(jiffies, base->timer_jiffies))
1218        __run_timers(base);
1219}
1220
1221/*
1222 * Called by the local, per-CPU timer interrupt on SMP.
1223 */
1224void run_local_timers(void)
1225{
1226    hrtimer_run_queues();
1227    raise_softirq(TIMER_SOFTIRQ);
1228    softlockup_tick();
1229}
1230
1231/*
1232 * The 64-bit jiffies value is not atomic - you MUST NOT read it
1233 * without sampling the sequence number in xtime_lock.
1234 * jiffies is defined in the linker script...
1235 */
1236
1237void do_timer(unsigned long ticks)
1238{
1239    jiffies_64 += ticks;
1240    update_wall_time();
1241    calc_global_load();
1242}
1243
1244#ifdef __ARCH_WANT_SYS_ALARM
1245
1246/*
1247 * For backwards compatibility? This can be done in libc so Alpha
1248 * and all newer ports shouldn't need it.
1249 */
1250SYSCALL_DEFINE1(alarm, unsigned int, seconds)
1251{
1252    return alarm_setitimer(seconds);
1253}
1254
1255#endif
1256
1257#ifndef __alpha__
1258
1259/*
1260 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
1261 * should be moved into arch/i386 instead?
1262 */
1263
1264/**
1265 * sys_getpid - return the thread group id of the current process
1266 *
1267 * Note, despite the name, this returns the tgid not the pid. The tgid and
1268 * the pid are identical unless CLONE_THREAD was specified on clone() in
1269 * which case the tgid is the same in all threads of the same group.
1270 *
1271 * This is SMP safe as current->tgid does not change.
1272 */
1273SYSCALL_DEFINE0(getpid)
1274{
1275    return task_tgid_vnr(current);
1276}
1277
1278/*
1279 * Accessing ->real_parent is not SMP-safe, it could
1280 * change from under us. However, we can use a stale
1281 * value of ->real_parent under rcu_read_lock(), see
1282 * release_task()->call_rcu(delayed_put_task_struct).
1283 */
1284SYSCALL_DEFINE0(getppid)
1285{
1286    int pid;
1287
1288    rcu_read_lock();
1289    pid = task_tgid_vnr(current->real_parent);
1290    rcu_read_unlock();
1291
1292    return pid;
1293}
1294
1295SYSCALL_DEFINE0(getuid)
1296{
1297    /* Only we change this so SMP safe */
1298    return current_uid();
1299}
1300
1301SYSCALL_DEFINE0(geteuid)
1302{
1303    /* Only we change this so SMP safe */
1304    return current_euid();
1305}
1306
1307SYSCALL_DEFINE0(getgid)
1308{
1309    /* Only we change this so SMP safe */
1310    return current_gid();
1311}
1312
1313SYSCALL_DEFINE0(getegid)
1314{
1315    /* Only we change this so SMP safe */
1316    return current_egid();
1317}
1318
1319#endif
1320
1321static void process_timeout(unsigned long __data)
1322{
1323    wake_up_process((struct task_struct *)__data);
1324}
1325
1326/**
1327 * schedule_timeout - sleep until timeout
1328 * @timeout: timeout value in jiffies
1329 *
1330 * Make the current task sleep until @timeout jiffies have
1331 * elapsed. The routine will return immediately unless
1332 * the current task state has been set (see set_current_state()).
1333 *
1334 * You can set the task state as follows -
1335 *
1336 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
1337 * pass before the routine returns. The routine will return 0
1338 *
1339 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1340 * delivered to the current task. In this case the remaining time
1341 * in jiffies will be returned, or 0 if the timer expired in time
1342 *
1343 * The current task state is guaranteed to be TASK_RUNNING when this
1344 * routine returns.
1345 *
1346 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
1347 * the CPU away without a bound on the timeout. In this case the return
1348 * value will be %MAX_SCHEDULE_TIMEOUT.
1349 *
1350 * In all cases the return value is guaranteed to be non-negative.
1351 */
1352signed long __sched schedule_timeout(signed long timeout)
1353{
1354    struct timer_list timer;
1355    unsigned long expire;
1356
1357    switch (timeout)
1358    {
1359    case MAX_SCHEDULE_TIMEOUT:
1360        /*
1361         * These two special cases are useful to be comfortable
1362         * in the caller. Nothing more. We could take
1363         * MAX_SCHEDULE_TIMEOUT from one of the negative value
1364         * but I' d like to return a valid offset (>=0) to allow
1365         * the caller to do everything it want with the retval.
1366         */
1367        schedule();
1368        goto out;
1369    default:
1370        /*
1371         * Another bit of PARANOID. Note that the retval will be
1372         * 0 since no piece of kernel is supposed to do a check
1373         * for a negative retval of schedule_timeout() (since it
1374         * should never happens anyway). You just have the printk()
1375         * that will tell you if something is gone wrong and where.
1376         */
1377        if (timeout < 0) {
1378            printk(KERN_ERR "schedule_timeout: wrong timeout "
1379                "value %lx\n", timeout);
1380            dump_stack();
1381            current->state = TASK_RUNNING;
1382            goto out;
1383        }
1384    }
1385
1386    expire = timeout + jiffies;
1387
1388    setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
1389    __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
1390    schedule();
1391    del_singleshot_timer_sync(&timer);
1392
1393    /* Remove the timer from the object tracker */
1394    destroy_timer_on_stack(&timer);
1395
1396    timeout = expire - jiffies;
1397
1398 out:
1399    return timeout < 0 ? 0 : timeout;
1400}
1401EXPORT_SYMBOL(schedule_timeout);
1402
1403/*
1404 * We can use __set_current_state() here because schedule_timeout() calls
1405 * schedule() unconditionally.
1406 */
1407signed long __sched schedule_timeout_interruptible(signed long timeout)
1408{
1409    __set_current_state(TASK_INTERRUPTIBLE);
1410    return schedule_timeout(timeout);
1411}
1412EXPORT_SYMBOL(schedule_timeout_interruptible);
1413
1414signed long __sched schedule_timeout_killable(signed long timeout)
1415{
1416    __set_current_state(TASK_KILLABLE);
1417    return schedule_timeout(timeout);
1418}
1419EXPORT_SYMBOL(schedule_timeout_killable);
1420
1421signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1422{
1423    __set_current_state(TASK_UNINTERRUPTIBLE);
1424    return schedule_timeout(timeout);
1425}
1426EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1427
1428/* Thread ID - the internal kernel "pid" */
1429SYSCALL_DEFINE0(gettid)
1430{
1431    return task_pid_vnr(current);
1432}
1433
1434/**
1435 * do_sysinfo - fill in sysinfo struct
1436 * @info: pointer to buffer to fill
1437 */
1438int do_sysinfo(struct sysinfo *info)
1439{
1440    unsigned long mem_total, sav_total;
1441    unsigned int mem_unit, bitcount;
1442    struct timespec tp;
1443
1444    memset(info, 0, sizeof(struct sysinfo));
1445
1446    ktime_get_ts(&tp);
1447    monotonic_to_bootbased(&tp);
1448    info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
1449
1450    get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
1451
1452    info->procs = nr_threads;
1453
1454    si_meminfo(info);
1455    si_swapinfo(info);
1456
1457    /*
1458     * If the sum of all the available memory (i.e. ram + swap)
1459     * is less than can be stored in a 32 bit unsigned long then
1460     * we can be binary compatible with 2.2.x kernels. If not,
1461     * well, in that case 2.2.x was broken anyways...
1462     *
1463     * -Erik Andersen <andersee@debian.org>
1464     */
1465
1466    mem_total = info->totalram + info->totalswap;
1467    if (mem_total < info->totalram || mem_total < info->totalswap)
1468        goto out;
1469    bitcount = 0;
1470    mem_unit = info->mem_unit;
1471    while (mem_unit > 1) {
1472        bitcount++;
1473        mem_unit >>= 1;
1474        sav_total = mem_total;
1475        mem_total <<= 1;
1476        if (mem_total < sav_total)
1477            goto out;
1478    }
1479
1480    /*
1481     * If mem_total did not overflow, multiply all memory values by
1482     * info->mem_unit and set it to 1. This leaves things compatible
1483     * with 2.2.x, and also retains compatibility with earlier 2.4.x
1484     * kernels...
1485     */
1486
1487    info->mem_unit = 1;
1488    info->totalram <<= bitcount;
1489    info->freeram <<= bitcount;
1490    info->sharedram <<= bitcount;
1491    info->bufferram <<= bitcount;
1492    info->totalswap <<= bitcount;
1493    info->freeswap <<= bitcount;
1494    info->totalhigh <<= bitcount;
1495    info->freehigh <<= bitcount;
1496
1497out:
1498    return 0;
1499}
1500
1501SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
1502{
1503    struct sysinfo val;
1504
1505    do_sysinfo(&val);
1506
1507    if (copy_to_user(info, &val, sizeof(struct sysinfo)))
1508        return -EFAULT;
1509
1510    return 0;
1511}
1512
1513static int __cpuinit init_timers_cpu(int cpu)
1514{
1515    int j;
1516    struct tvec_base *base;
1517    static char __cpuinitdata tvec_base_done[NR_CPUS];
1518
1519    if (!tvec_base_done[cpu]) {
1520        static char boot_done;
1521
1522        if (boot_done) {
1523            /*
1524             * The APs use this path later in boot
1525             */
1526            base = kmalloc_node(sizeof(*base),
1527                        GFP_KERNEL | __GFP_ZERO,
1528                        cpu_to_node(cpu));
1529            if (!base)
1530                return -ENOMEM;
1531
1532            /* Make sure that tvec_base is 2 byte aligned */
1533            if (tbase_get_deferrable(base)) {
1534                WARN_ON(1);
1535                kfree(base);
1536                return -ENOMEM;
1537            }
1538            per_cpu(tvec_bases, cpu) = base;
1539        } else {
1540            /*
1541             * This is for the boot CPU - we use compile-time
1542             * static initialisation because per-cpu memory isn't
1543             * ready yet and because the memory allocators are not
1544             * initialised either.
1545             */
1546            boot_done = 1;
1547            base = &boot_tvec_bases;
1548        }
1549        tvec_base_done[cpu] = 1;
1550    } else {
1551        base = per_cpu(tvec_bases, cpu);
1552    }
1553
1554    spin_lock_init(&base->lock);
1555
1556    for (j = 0; j < TVN_SIZE; j++) {
1557        INIT_LIST_HEAD(base->tv5.vec + j);
1558        INIT_LIST_HEAD(base->tv4.vec + j);
1559        INIT_LIST_HEAD(base->tv3.vec + j);
1560        INIT_LIST_HEAD(base->tv2.vec + j);
1561    }
1562    for (j = 0; j < TVR_SIZE; j++)
1563        INIT_LIST_HEAD(base->tv1.vec + j);
1564
1565    base->timer_jiffies = jiffies;
1566    base->next_timer = base->timer_jiffies;
1567    return 0;
1568}
1569
1570#ifdef CONFIG_HOTPLUG_CPU
1571static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
1572{
1573    struct timer_list *timer;
1574
1575    while (!list_empty(head)) {
1576        timer = list_first_entry(head, struct timer_list, entry);
1577        detach_timer(timer, 0);
1578        timer_set_base(timer, new_base);
1579        if (time_before(timer->expires, new_base->next_timer) &&
1580            !tbase_get_deferrable(timer->base))
1581            new_base->next_timer = timer->expires;
1582        internal_add_timer(new_base, timer);
1583    }
1584}
1585
1586static void __cpuinit migrate_timers(int cpu)
1587{
1588    struct tvec_base *old_base;
1589    struct tvec_base *new_base;
1590    int i;
1591
1592    BUG_ON(cpu_online(cpu));
1593    old_base = per_cpu(tvec_bases, cpu);
1594    new_base = get_cpu_var(tvec_bases);
1595    /*
1596     * The caller is globally serialized and nobody else
1597     * takes two locks at once, deadlock is not possible.
1598     */
1599    spin_lock_irq(&new_base->lock);
1600    spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1601
1602    BUG_ON(old_base->running_timer);
1603
1604    for (i = 0; i < TVR_SIZE; i++)
1605        migrate_timer_list(new_base, old_base->tv1.vec + i);
1606    for (i = 0; i < TVN_SIZE; i++) {
1607        migrate_timer_list(new_base, old_base->tv2.vec + i);
1608        migrate_timer_list(new_base, old_base->tv3.vec + i);
1609        migrate_timer_list(new_base, old_base->tv4.vec + i);
1610        migrate_timer_list(new_base, old_base->tv5.vec + i);
1611    }
1612
1613    spin_unlock(&old_base->lock);
1614    spin_unlock_irq(&new_base->lock);
1615    put_cpu_var(tvec_bases);
1616}
1617#endif /* CONFIG_HOTPLUG_CPU */
1618
1619static int __cpuinit timer_cpu_notify(struct notifier_block *self,
1620                unsigned long action, void *hcpu)
1621{
1622    long cpu = (long)hcpu;
1623    switch(action) {
1624    case CPU_UP_PREPARE:
1625    case CPU_UP_PREPARE_FROZEN:
1626        if (init_timers_cpu(cpu) < 0)
1627            return NOTIFY_BAD;
1628        break;
1629#ifdef CONFIG_HOTPLUG_CPU
1630    case CPU_DEAD:
1631    case CPU_DEAD_FROZEN:
1632        migrate_timers(cpu);
1633        break;
1634#endif
1635    default:
1636        break;
1637    }
1638    return NOTIFY_OK;
1639}
1640
1641static struct notifier_block __cpuinitdata timers_nb = {
1642    .notifier_call = timer_cpu_notify,
1643};
1644
1645
1646void __init init_timers(void)
1647{
1648    int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
1649                (void *)(long)smp_processor_id());
1650
1651    init_timer_stats();
1652
1653    BUG_ON(err == NOTIFY_BAD);
1654    register_cpu_notifier(&timers_nb);
1655    open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1656}
1657
1658/**
1659 * msleep - sleep safely even with waitqueue interruptions
1660 * @msecs: Time in milliseconds to sleep for
1661 */
1662void msleep(unsigned int msecs)
1663{
1664    unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1665
1666    while (timeout)
1667        timeout = schedule_timeout_uninterruptible(timeout);
1668}
1669
1670EXPORT_SYMBOL(msleep);
1671
1672/**
1673 * msleep_interruptible - sleep waiting for signals
1674 * @msecs: Time in milliseconds to sleep for
1675 */
1676unsigned long msleep_interruptible(unsigned int msecs)
1677{
1678    unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1679
1680    while (timeout && !signal_pending(current))
1681        timeout = schedule_timeout_interruptible(timeout);
1682    return jiffies_to_msecs(timeout);
1683}
1684
1685EXPORT_SYMBOL(msleep_interruptible);
1686

Archive Download this file



interactive