Root/kernel/cpu.c

1/* CPU control.
2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
3 *
4 * This code is licenced under the GPL.
5 */
6#include <linux/proc_fs.h>
7#include <linux/smp.h>
8#include <linux/init.h>
9#include <linux/notifier.h>
10#include <linux/sched.h>
11#include <linux/unistd.h>
12#include <linux/cpu.h>
13#include <linux/oom.h>
14#include <linux/rcupdate.h>
15#include <linux/export.h>
16#include <linux/bug.h>
17#include <linux/kthread.h>
18#include <linux/stop_machine.h>
19#include <linux/mutex.h>
20#include <linux/gfp.h>
21#include <linux/suspend.h>
22
23#include "smpboot.h"
24
25#ifdef CONFIG_SMP
26/* Serializes the updates to cpu_online_mask, cpu_present_mask */
27static DEFINE_MUTEX(cpu_add_remove_lock);
28
29/*
30 * The following two API's must be used when attempting
31 * to serialize the updates to cpu_online_mask, cpu_present_mask.
32 */
33void cpu_maps_update_begin(void)
34{
35    mutex_lock(&cpu_add_remove_lock);
36}
37
38void cpu_maps_update_done(void)
39{
40    mutex_unlock(&cpu_add_remove_lock);
41}
42
43static RAW_NOTIFIER_HEAD(cpu_chain);
44
45/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
46 * Should always be manipulated under cpu_add_remove_lock
47 */
48static int cpu_hotplug_disabled;
49
50#ifdef CONFIG_HOTPLUG_CPU
51
52static struct {
53    struct task_struct *active_writer;
54    struct mutex lock; /* Synchronizes accesses to refcount, */
55    /*
56     * Also blocks the new readers during
57     * an ongoing cpu hotplug operation.
58     */
59    int refcount;
60} cpu_hotplug = {
61    .active_writer = NULL,
62    .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
63    .refcount = 0,
64};
65
66void get_online_cpus(void)
67{
68    might_sleep();
69    if (cpu_hotplug.active_writer == current)
70        return;
71    mutex_lock(&cpu_hotplug.lock);
72    cpu_hotplug.refcount++;
73    mutex_unlock(&cpu_hotplug.lock);
74
75}
76EXPORT_SYMBOL_GPL(get_online_cpus);
77
78void put_online_cpus(void)
79{
80    if (cpu_hotplug.active_writer == current)
81        return;
82    mutex_lock(&cpu_hotplug.lock);
83    if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
84        wake_up_process(cpu_hotplug.active_writer);
85    mutex_unlock(&cpu_hotplug.lock);
86
87}
88EXPORT_SYMBOL_GPL(put_online_cpus);
89
90/*
91 * This ensures that the hotplug operation can begin only when the
92 * refcount goes to zero.
93 *
94 * Note that during a cpu-hotplug operation, the new readers, if any,
95 * will be blocked by the cpu_hotplug.lock
96 *
97 * Since cpu_hotplug_begin() is always called after invoking
98 * cpu_maps_update_begin(), we can be sure that only one writer is active.
99 *
100 * Note that theoretically, there is a possibility of a livelock:
101 * - Refcount goes to zero, last reader wakes up the sleeping
102 * writer.
103 * - Last reader unlocks the cpu_hotplug.lock.
104 * - A new reader arrives at this moment, bumps up the refcount.
105 * - The writer acquires the cpu_hotplug.lock finds the refcount
106 * non zero and goes to sleep again.
107 *
108 * However, this is very difficult to achieve in practice since
109 * get_online_cpus() not an api which is called all that often.
110 *
111 */
112static void cpu_hotplug_begin(void)
113{
114    cpu_hotplug.active_writer = current;
115
116    for (;;) {
117        mutex_lock(&cpu_hotplug.lock);
118        if (likely(!cpu_hotplug.refcount))
119            break;
120        __set_current_state(TASK_UNINTERRUPTIBLE);
121        mutex_unlock(&cpu_hotplug.lock);
122        schedule();
123    }
124}
125
126static void cpu_hotplug_done(void)
127{
128    cpu_hotplug.active_writer = NULL;
129    mutex_unlock(&cpu_hotplug.lock);
130}
131
132#else /* #if CONFIG_HOTPLUG_CPU */
133static void cpu_hotplug_begin(void) {}
134static void cpu_hotplug_done(void) {}
135#endif /* #else #if CONFIG_HOTPLUG_CPU */
136
137/* Need to know about CPUs going up/down? */
138int __ref register_cpu_notifier(struct notifier_block *nb)
139{
140    int ret;
141    cpu_maps_update_begin();
142    ret = raw_notifier_chain_register(&cpu_chain, nb);
143    cpu_maps_update_done();
144    return ret;
145}
146
147static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
148            int *nr_calls)
149{
150    int ret;
151
152    ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
153                    nr_calls);
154
155    return notifier_to_errno(ret);
156}
157
158static int cpu_notify(unsigned long val, void *v)
159{
160    return __cpu_notify(val, v, -1, NULL);
161}
162
163#ifdef CONFIG_HOTPLUG_CPU
164
165static void cpu_notify_nofail(unsigned long val, void *v)
166{
167    BUG_ON(cpu_notify(val, v));
168}
169EXPORT_SYMBOL(register_cpu_notifier);
170
171void __ref unregister_cpu_notifier(struct notifier_block *nb)
172{
173    cpu_maps_update_begin();
174    raw_notifier_chain_unregister(&cpu_chain, nb);
175    cpu_maps_update_done();
176}
177EXPORT_SYMBOL(unregister_cpu_notifier);
178
179/**
180 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
181 * @cpu: a CPU id
182 *
183 * This function walks all processes, finds a valid mm struct for each one and
184 * then clears a corresponding bit in mm's cpumask. While this all sounds
185 * trivial, there are various non-obvious corner cases, which this function
186 * tries to solve in a safe manner.
187 *
188 * Also note that the function uses a somewhat relaxed locking scheme, so it may
189 * be called only for an already offlined CPU.
190 */
191void clear_tasks_mm_cpumask(int cpu)
192{
193    struct task_struct *p;
194
195    /*
196     * This function is called after the cpu is taken down and marked
197     * offline, so its not like new tasks will ever get this cpu set in
198     * their mm mask. -- Peter Zijlstra
199     * Thus, we may use rcu_read_lock() here, instead of grabbing
200     * full-fledged tasklist_lock.
201     */
202    WARN_ON(cpu_online(cpu));
203    rcu_read_lock();
204    for_each_process(p) {
205        struct task_struct *t;
206
207        /*
208         * Main thread might exit, but other threads may still have
209         * a valid mm. Find one.
210         */
211        t = find_lock_task_mm(p);
212        if (!t)
213            continue;
214        cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
215        task_unlock(t);
216    }
217    rcu_read_unlock();
218}
219
220static inline void check_for_tasks(int cpu)
221{
222    struct task_struct *p;
223
224    write_lock_irq(&tasklist_lock);
225    for_each_process(p) {
226        if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
227            (p->utime || p->stime))
228            printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
229                "(state = %ld, flags = %x)\n",
230                p->comm, task_pid_nr(p), cpu,
231                p->state, p->flags);
232    }
233    write_unlock_irq(&tasklist_lock);
234}
235
236struct take_cpu_down_param {
237    unsigned long mod;
238    void *hcpu;
239};
240
241/* Take this CPU down. */
242static int __ref take_cpu_down(void *_param)
243{
244    struct take_cpu_down_param *param = _param;
245    int err;
246
247    /* Ensure this CPU doesn't handle any more interrupts. */
248    err = __cpu_disable();
249    if (err < 0)
250        return err;
251
252    cpu_notify(CPU_DYING | param->mod, param->hcpu);
253    return 0;
254}
255
256/* Requires cpu_add_remove_lock to be held */
257static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
258{
259    int err, nr_calls = 0;
260    void *hcpu = (void *)(long)cpu;
261    unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
262    struct take_cpu_down_param tcd_param = {
263        .mod = mod,
264        .hcpu = hcpu,
265    };
266
267    if (num_online_cpus() == 1)
268        return -EBUSY;
269
270    if (!cpu_online(cpu))
271        return -EINVAL;
272
273    cpu_hotplug_begin();
274
275    err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
276    if (err) {
277        nr_calls--;
278        __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
279        printk("%s: attempt to take down CPU %u failed\n",
280                __func__, cpu);
281        goto out_release;
282    }
283
284    err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
285    if (err) {
286        /* CPU didn't die: tell everyone. Can't complain. */
287        cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
288
289        goto out_release;
290    }
291    BUG_ON(cpu_online(cpu));
292
293    /*
294     * The migration_call() CPU_DYING callback will have removed all
295     * runnable tasks from the cpu, there's only the idle task left now
296     * that the migration thread is done doing the stop_machine thing.
297     *
298     * Wait for the stop thread to go away.
299     */
300    while (!idle_cpu(cpu))
301        cpu_relax();
302
303    /* This actually kills the CPU. */
304    __cpu_die(cpu);
305
306    /* CPU is completely dead: tell everyone. Too late to complain. */
307    cpu_notify_nofail(CPU_DEAD | mod, hcpu);
308
309    check_for_tasks(cpu);
310
311out_release:
312    cpu_hotplug_done();
313    if (!err)
314        cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
315    return err;
316}
317
318int __ref cpu_down(unsigned int cpu)
319{
320    int err;
321
322    cpu_maps_update_begin();
323
324    if (cpu_hotplug_disabled) {
325        err = -EBUSY;
326        goto out;
327    }
328
329    err = _cpu_down(cpu, 0);
330
331out:
332    cpu_maps_update_done();
333    return err;
334}
335EXPORT_SYMBOL(cpu_down);
336#endif /*CONFIG_HOTPLUG_CPU*/
337
338/* Requires cpu_add_remove_lock to be held */
339static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
340{
341    int ret, nr_calls = 0;
342    void *hcpu = (void *)(long)cpu;
343    unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
344    struct task_struct *idle;
345
346    if (cpu_online(cpu) || !cpu_present(cpu))
347        return -EINVAL;
348
349    cpu_hotplug_begin();
350
351    idle = idle_thread_get(cpu);
352    if (IS_ERR(idle)) {
353        ret = PTR_ERR(idle);
354        goto out;
355    }
356
357    ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
358    if (ret) {
359        nr_calls--;
360        printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
361                __func__, cpu);
362        goto out_notify;
363    }
364
365    /* Arch-specific enabling code. */
366    ret = __cpu_up(cpu, idle);
367    if (ret != 0)
368        goto out_notify;
369    BUG_ON(!cpu_online(cpu));
370
371    /* Now call notifier in preparation. */
372    cpu_notify(CPU_ONLINE | mod, hcpu);
373
374out_notify:
375    if (ret != 0)
376        __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
377out:
378    cpu_hotplug_done();
379
380    return ret;
381}
382
383int __cpuinit cpu_up(unsigned int cpu)
384{
385    int err = 0;
386
387#ifdef CONFIG_MEMORY_HOTPLUG
388    int nid;
389    pg_data_t *pgdat;
390#endif
391
392    if (!cpu_possible(cpu)) {
393        printk(KERN_ERR "can't online cpu %d because it is not "
394            "configured as may-hotadd at boot time\n", cpu);
395#if defined(CONFIG_IA64)
396        printk(KERN_ERR "please check additional_cpus= boot "
397                "parameter\n");
398#endif
399        return -EINVAL;
400    }
401
402#ifdef CONFIG_MEMORY_HOTPLUG
403    nid = cpu_to_node(cpu);
404    if (!node_online(nid)) {
405        err = mem_online_node(nid);
406        if (err)
407            return err;
408    }
409
410    pgdat = NODE_DATA(nid);
411    if (!pgdat) {
412        printk(KERN_ERR
413            "Can't online cpu %d due to NULL pgdat\n", cpu);
414        return -ENOMEM;
415    }
416
417    if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
418        mutex_lock(&zonelists_mutex);
419        build_all_zonelists(NULL);
420        mutex_unlock(&zonelists_mutex);
421    }
422#endif
423
424    cpu_maps_update_begin();
425
426    if (cpu_hotplug_disabled) {
427        err = -EBUSY;
428        goto out;
429    }
430
431    err = _cpu_up(cpu, 0);
432
433out:
434    cpu_maps_update_done();
435    return err;
436}
437EXPORT_SYMBOL_GPL(cpu_up);
438
439#ifdef CONFIG_PM_SLEEP_SMP
440static cpumask_var_t frozen_cpus;
441
442void __weak arch_disable_nonboot_cpus_begin(void)
443{
444}
445
446void __weak arch_disable_nonboot_cpus_end(void)
447{
448}
449
450int disable_nonboot_cpus(void)
451{
452    int cpu, first_cpu, error = 0;
453
454    cpu_maps_update_begin();
455    first_cpu = cpumask_first(cpu_online_mask);
456    /*
457     * We take down all of the non-boot CPUs in one shot to avoid races
458     * with the userspace trying to use the CPU hotplug at the same time
459     */
460    cpumask_clear(frozen_cpus);
461    arch_disable_nonboot_cpus_begin();
462
463    printk("Disabling non-boot CPUs ...\n");
464    for_each_online_cpu(cpu) {
465        if (cpu == first_cpu)
466            continue;
467        error = _cpu_down(cpu, 1);
468        if (!error)
469            cpumask_set_cpu(cpu, frozen_cpus);
470        else {
471            printk(KERN_ERR "Error taking CPU%d down: %d\n",
472                cpu, error);
473            break;
474        }
475    }
476
477    arch_disable_nonboot_cpus_end();
478
479    if (!error) {
480        BUG_ON(num_online_cpus() > 1);
481        /* Make sure the CPUs won't be enabled by someone else */
482        cpu_hotplug_disabled = 1;
483    } else {
484        printk(KERN_ERR "Non-boot CPUs are not disabled\n");
485    }
486    cpu_maps_update_done();
487    return error;
488}
489
490void __weak arch_enable_nonboot_cpus_begin(void)
491{
492}
493
494void __weak arch_enable_nonboot_cpus_end(void)
495{
496}
497
498void __ref enable_nonboot_cpus(void)
499{
500    int cpu, error;
501
502    /* Allow everyone to use the CPU hotplug again */
503    cpu_maps_update_begin();
504    cpu_hotplug_disabled = 0;
505    if (cpumask_empty(frozen_cpus))
506        goto out;
507
508    printk(KERN_INFO "Enabling non-boot CPUs ...\n");
509
510    arch_enable_nonboot_cpus_begin();
511
512    for_each_cpu(cpu, frozen_cpus) {
513        error = _cpu_up(cpu, 1);
514        if (!error) {
515            printk(KERN_INFO "CPU%d is up\n", cpu);
516            continue;
517        }
518        printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
519    }
520
521    arch_enable_nonboot_cpus_end();
522
523    cpumask_clear(frozen_cpus);
524out:
525    cpu_maps_update_done();
526}
527
528static int __init alloc_frozen_cpus(void)
529{
530    if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
531        return -ENOMEM;
532    return 0;
533}
534core_initcall(alloc_frozen_cpus);
535
536/*
537 * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU
538 * hotplug when tasks are about to be frozen. Also, don't allow the freezer
539 * to continue until any currently running CPU hotplug operation gets
540 * completed.
541 * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the
542 * 'cpu_add_remove_lock'. And this same lock is also taken by the regular
543 * CPU hotplug path and released only after it is complete. Thus, we
544 * (and hence the freezer) will block here until any currently running CPU
545 * hotplug operation gets completed.
546 */
547void cpu_hotplug_disable_before_freeze(void)
548{
549    cpu_maps_update_begin();
550    cpu_hotplug_disabled = 1;
551    cpu_maps_update_done();
552}
553
554
555/*
556 * When tasks have been thawed, re-enable regular CPU hotplug (which had been
557 * disabled while beginning to freeze tasks).
558 */
559void cpu_hotplug_enable_after_thaw(void)
560{
561    cpu_maps_update_begin();
562    cpu_hotplug_disabled = 0;
563    cpu_maps_update_done();
564}
565
566/*
567 * When callbacks for CPU hotplug notifications are being executed, we must
568 * ensure that the state of the system with respect to the tasks being frozen
569 * or not, as reported by the notification, remains unchanged *throughout the
570 * duration* of the execution of the callbacks.
571 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
572 *
573 * This synchronization is implemented by mutually excluding regular CPU
574 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
575 * Hibernate notifications.
576 */
577static int
578cpu_hotplug_pm_callback(struct notifier_block *nb,
579            unsigned long action, void *ptr)
580{
581    switch (action) {
582
583    case PM_SUSPEND_PREPARE:
584    case PM_HIBERNATION_PREPARE:
585        cpu_hotplug_disable_before_freeze();
586        break;
587
588    case PM_POST_SUSPEND:
589    case PM_POST_HIBERNATION:
590        cpu_hotplug_enable_after_thaw();
591        break;
592
593    default:
594        return NOTIFY_DONE;
595    }
596
597    return NOTIFY_OK;
598}
599
600
601static int __init cpu_hotplug_pm_sync_init(void)
602{
603    pm_notifier(cpu_hotplug_pm_callback, 0);
604    return 0;
605}
606core_initcall(cpu_hotplug_pm_sync_init);
607
608#endif /* CONFIG_PM_SLEEP_SMP */
609
610/**
611 * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
612 * @cpu: cpu that just started
613 *
614 * This function calls the cpu_chain notifiers with CPU_STARTING.
615 * It must be called by the arch code on the new cpu, before the new cpu
616 * enables interrupts and before the "boot" cpu returns from __cpu_up().
617 */
618void __cpuinit notify_cpu_starting(unsigned int cpu)
619{
620    unsigned long val = CPU_STARTING;
621
622#ifdef CONFIG_PM_SLEEP_SMP
623    if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
624        val = CPU_STARTING_FROZEN;
625#endif /* CONFIG_PM_SLEEP_SMP */
626    cpu_notify(val, (void *)(long)cpu);
627}
628
629#endif /* CONFIG_SMP */
630
631/*
632 * cpu_bit_bitmap[] is a special, "compressed" data structure that
633 * represents all NR_CPUS bits binary values of 1<<nr.
634 *
635 * It is used by cpumask_of() to get a constant address to a CPU
636 * mask value that has a single bit set only.
637 */
638
639/* cpu_bit_bitmap[0] is empty - so we can back into it */
640#define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x))
641#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
642#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
643#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
644
645const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
646
647    MASK_DECLARE_8(0), MASK_DECLARE_8(8),
648    MASK_DECLARE_8(16), MASK_DECLARE_8(24),
649#if BITS_PER_LONG > 32
650    MASK_DECLARE_8(32), MASK_DECLARE_8(40),
651    MASK_DECLARE_8(48), MASK_DECLARE_8(56),
652#endif
653};
654EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
655
656const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
657EXPORT_SYMBOL(cpu_all_bits);
658
659#ifdef CONFIG_INIT_ALL_POSSIBLE
660static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
661    = CPU_BITS_ALL;
662#else
663static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
664#endif
665const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
666EXPORT_SYMBOL(cpu_possible_mask);
667
668static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
669const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
670EXPORT_SYMBOL(cpu_online_mask);
671
672static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
673const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
674EXPORT_SYMBOL(cpu_present_mask);
675
676static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
677const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
678EXPORT_SYMBOL(cpu_active_mask);
679
680void set_cpu_possible(unsigned int cpu, bool possible)
681{
682    if (possible)
683        cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
684    else
685        cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
686}
687
688void set_cpu_present(unsigned int cpu, bool present)
689{
690    if (present)
691        cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
692    else
693        cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
694}
695
696void set_cpu_online(unsigned int cpu, bool online)
697{
698    if (online)
699        cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
700    else
701        cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
702}
703
704void set_cpu_active(unsigned int cpu, bool active)
705{
706    if (active)
707        cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
708    else
709        cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
710}
711
712void init_cpu_present(const struct cpumask *src)
713{
714    cpumask_copy(to_cpumask(cpu_present_bits), src);
715}
716
717void init_cpu_possible(const struct cpumask *src)
718{
719    cpumask_copy(to_cpumask(cpu_possible_bits), src);
720}
721
722void init_cpu_online(const struct cpumask *src)
723{
724    cpumask_copy(to_cpumask(cpu_online_bits), src);
725}
726

Archive Download this file



interactive