Root/mm/oom_kill.c

1/*
2 * linux/mm/oom_kill.c
3 *
4 * Copyright (C) 1998,2000 Rik van Riel
5 * Thanks go out to Claus Fischer for some serious inspiration and
6 * for goading me into coding this file...
7 * Copyright (C) 2010 Google, Inc.
8 * Rewritten by David Rientjes
9 *
10 * The routines in this file are used to kill a process when
11 * we're seriously out of memory. This gets called from __alloc_pages()
12 * in mm/page_alloc.c when we really run out of memory.
13 *
14 * Since we won't call these routines often (on a well-configured
15 * machine) this file will double as a 'coding guide' and a signpost
16 * for newbie kernel hackers. It features several pointers to major
17 * kernel subsystems and hints as to where to find out what things do.
18 */
19
20#include <linux/oom.h>
21#include <linux/mm.h>
22#include <linux/err.h>
23#include <linux/gfp.h>
24#include <linux/sched.h>
25#include <linux/swap.h>
26#include <linux/timex.h>
27#include <linux/jiffies.h>
28#include <linux/cpuset.h>
29#include <linux/export.h>
30#include <linux/notifier.h>
31#include <linux/memcontrol.h>
32#include <linux/mempolicy.h>
33#include <linux/security.h>
34#include <linux/ptrace.h>
35#include <linux/freezer.h>
36#include <linux/ftrace.h>
37#include <linux/ratelimit.h>
38
39#define CREATE_TRACE_POINTS
40#include <trace/events/oom.h>
41
42int sysctl_panic_on_oom;
43int sysctl_oom_kill_allocating_task;
44int sysctl_oom_dump_tasks = 1;
45static DEFINE_SPINLOCK(zone_scan_lock);
46
47/*
48 * compare_swap_oom_score_adj() - compare and swap current's oom_score_adj
49 * @old_val: old oom_score_adj for compare
50 * @new_val: new oom_score_adj for swap
51 *
52 * Sets the oom_score_adj value for current to @new_val iff its present value is
53 * @old_val. Usually used to reinstate a previous value to prevent racing with
54 * userspacing tuning the value in the interim.
55 */
56void compare_swap_oom_score_adj(int old_val, int new_val)
57{
58    struct sighand_struct *sighand = current->sighand;
59
60    spin_lock_irq(&sighand->siglock);
61    if (current->signal->oom_score_adj == old_val)
62        current->signal->oom_score_adj = new_val;
63    trace_oom_score_adj_update(current);
64    spin_unlock_irq(&sighand->siglock);
65}
66
67/**
68 * test_set_oom_score_adj() - set current's oom_score_adj and return old value
69 * @new_val: new oom_score_adj value
70 *
71 * Sets the oom_score_adj value for current to @new_val with proper
72 * synchronization and returns the old value. Usually used to temporarily
73 * set a value, save the old value in the caller, and then reinstate it later.
74 */
75int test_set_oom_score_adj(int new_val)
76{
77    struct sighand_struct *sighand = current->sighand;
78    int old_val;
79
80    spin_lock_irq(&sighand->siglock);
81    old_val = current->signal->oom_score_adj;
82    current->signal->oom_score_adj = new_val;
83    trace_oom_score_adj_update(current);
84    spin_unlock_irq(&sighand->siglock);
85
86    return old_val;
87}
88
89#ifdef CONFIG_NUMA
90/**
91 * has_intersects_mems_allowed() - check task eligiblity for kill
92 * @tsk: task struct of which task to consider
93 * @mask: nodemask passed to page allocator for mempolicy ooms
94 *
95 * Task eligibility is determined by whether or not a candidate task, @tsk,
96 * shares the same mempolicy nodes as current if it is bound by such a policy
97 * and whether or not it has the same set of allowed cpuset nodes.
98 */
99static bool has_intersects_mems_allowed(struct task_struct *tsk,
100                    const nodemask_t *mask)
101{
102    struct task_struct *start = tsk;
103
104    do {
105        if (mask) {
106            /*
107             * If this is a mempolicy constrained oom, tsk's
108             * cpuset is irrelevant. Only return true if its
109             * mempolicy intersects current, otherwise it may be
110             * needlessly killed.
111             */
112            if (mempolicy_nodemask_intersects(tsk, mask))
113                return true;
114        } else {
115            /*
116             * This is not a mempolicy constrained oom, so only
117             * check the mems of tsk's cpuset.
118             */
119            if (cpuset_mems_allowed_intersects(current, tsk))
120                return true;
121        }
122    } while_each_thread(start, tsk);
123
124    return false;
125}
126#else
127static bool has_intersects_mems_allowed(struct task_struct *tsk,
128                    const nodemask_t *mask)
129{
130    return true;
131}
132#endif /* CONFIG_NUMA */
133
134/*
135 * The process p may have detached its own ->mm while exiting or through
136 * use_mm(), but one or more of its subthreads may still have a valid
137 * pointer. Return p, or any of its subthreads with a valid ->mm, with
138 * task_lock() held.
139 */
140struct task_struct *find_lock_task_mm(struct task_struct *p)
141{
142    struct task_struct *t = p;
143
144    do {
145        task_lock(t);
146        if (likely(t->mm))
147            return t;
148        task_unlock(t);
149    } while_each_thread(p, t);
150
151    return NULL;
152}
153
154/* return true if the task is not adequate as candidate victim task. */
155static bool oom_unkillable_task(struct task_struct *p,
156        const struct mem_cgroup *memcg, const nodemask_t *nodemask)
157{
158    if (is_global_init(p))
159        return true;
160    if (p->flags & PF_KTHREAD)
161        return true;
162
163    /* When mem_cgroup_out_of_memory() and p is not member of the group */
164    if (memcg && !task_in_mem_cgroup(p, memcg))
165        return true;
166
167    /* p may not have freeable memory in nodemask */
168    if (!has_intersects_mems_allowed(p, nodemask))
169        return true;
170
171    return false;
172}
173
174/**
175 * oom_badness - heuristic function to determine which candidate task to kill
176 * @p: task struct of which task we should calculate
177 * @totalpages: total present RAM allowed for page allocation
178 *
179 * The heuristic for determining which task to kill is made to be as simple and
180 * predictable as possible. The goal is to return the highest value for the
181 * task consuming the most memory to avoid subsequent oom failures.
182 */
183unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
184              const nodemask_t *nodemask, unsigned long totalpages)
185{
186    long points;
187    long adj;
188
189    if (oom_unkillable_task(p, memcg, nodemask))
190        return 0;
191
192    p = find_lock_task_mm(p);
193    if (!p)
194        return 0;
195
196    adj = p->signal->oom_score_adj;
197    if (adj == OOM_SCORE_ADJ_MIN) {
198        task_unlock(p);
199        return 0;
200    }
201
202    /*
203     * The baseline for the badness score is the proportion of RAM that each
204     * task's rss, pagetable and swap space use.
205     */
206    points = get_mm_rss(p->mm) + p->mm->nr_ptes +
207         get_mm_counter(p->mm, MM_SWAPENTS);
208    task_unlock(p);
209
210    /*
211     * Root processes get 3% bonus, just like the __vm_enough_memory()
212     * implementation used by LSMs.
213     */
214    if (has_capability_noaudit(p, CAP_SYS_ADMIN))
215        adj -= 30;
216
217    /* Normalize to oom_score_adj units */
218    adj *= totalpages / 1000;
219    points += adj;
220
221    /*
222     * Never return 0 for an eligible task regardless of the root bonus and
223     * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here).
224     */
225    return points > 0 ? points : 1;
226}
227
228/*
229 * Determine the type of allocation constraint.
230 */
231#ifdef CONFIG_NUMA
232static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
233                gfp_t gfp_mask, nodemask_t *nodemask,
234                unsigned long *totalpages)
235{
236    struct zone *zone;
237    struct zoneref *z;
238    enum zone_type high_zoneidx = gfp_zone(gfp_mask);
239    bool cpuset_limited = false;
240    int nid;
241
242    /* Default to all available memory */
243    *totalpages = totalram_pages + total_swap_pages;
244
245    if (!zonelist)
246        return CONSTRAINT_NONE;
247    /*
248     * Reach here only when __GFP_NOFAIL is used. So, we should avoid
249     * to kill current.We have to random task kill in this case.
250     * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
251     */
252    if (gfp_mask & __GFP_THISNODE)
253        return CONSTRAINT_NONE;
254
255    /*
256     * This is not a __GFP_THISNODE allocation, so a truncated nodemask in
257     * the page allocator means a mempolicy is in effect. Cpuset policy
258     * is enforced in get_page_from_freelist().
259     */
260    if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) {
261        *totalpages = total_swap_pages;
262        for_each_node_mask(nid, *nodemask)
263            *totalpages += node_spanned_pages(nid);
264        return CONSTRAINT_MEMORY_POLICY;
265    }
266
267    /* Check this allocation failure is caused by cpuset's wall function */
268    for_each_zone_zonelist_nodemask(zone, z, zonelist,
269            high_zoneidx, nodemask)
270        if (!cpuset_zone_allowed_softwall(zone, gfp_mask))
271            cpuset_limited = true;
272
273    if (cpuset_limited) {
274        *totalpages = total_swap_pages;
275        for_each_node_mask(nid, cpuset_current_mems_allowed)
276            *totalpages += node_spanned_pages(nid);
277        return CONSTRAINT_CPUSET;
278    }
279    return CONSTRAINT_NONE;
280}
281#else
282static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
283                gfp_t gfp_mask, nodemask_t *nodemask,
284                unsigned long *totalpages)
285{
286    *totalpages = totalram_pages + total_swap_pages;
287    return CONSTRAINT_NONE;
288}
289#endif
290
291enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
292        unsigned long totalpages, const nodemask_t *nodemask,
293        bool force_kill)
294{
295    if (task->exit_state)
296        return OOM_SCAN_CONTINUE;
297    if (oom_unkillable_task(task, NULL, nodemask))
298        return OOM_SCAN_CONTINUE;
299
300    /*
301     * This task already has access to memory reserves and is being killed.
302     * Don't allow any other task to have access to the reserves.
303     */
304    if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
305        if (unlikely(frozen(task)))
306            __thaw_task(task);
307        if (!force_kill)
308            return OOM_SCAN_ABORT;
309    }
310    if (!task->mm)
311        return OOM_SCAN_CONTINUE;
312
313    if (task->flags & PF_EXITING) {
314        /*
315         * If task is current and is in the process of releasing memory,
316         * allow the "kill" to set TIF_MEMDIE, which will allow it to
317         * access memory reserves. Otherwise, it may stall forever.
318         *
319         * The iteration isn't broken here, however, in case other
320         * threads are found to have already been oom killed.
321         */
322        if (task == current)
323            return OOM_SCAN_SELECT;
324        else if (!force_kill) {
325            /*
326             * If this task is not being ptraced on exit, then wait
327             * for it to finish before killing some other task
328             * unnecessarily.
329             */
330            if (!(task->group_leader->ptrace & PT_TRACE_EXIT))
331                return OOM_SCAN_ABORT;
332        }
333    }
334    return OOM_SCAN_OK;
335}
336
337/*
338 * Simple selection loop. We chose the process with the highest
339 * number of 'points'.
340 *
341 * (not docbooked, we don't want this one cluttering up the manual)
342 */
343static struct task_struct *select_bad_process(unsigned int *ppoints,
344        unsigned long totalpages, const nodemask_t *nodemask,
345        bool force_kill)
346{
347    struct task_struct *g, *p;
348    struct task_struct *chosen = NULL;
349    unsigned long chosen_points = 0;
350
351    rcu_read_lock();
352    do_each_thread(g, p) {
353        unsigned int points;
354
355        switch (oom_scan_process_thread(p, totalpages, nodemask,
356                        force_kill)) {
357        case OOM_SCAN_SELECT:
358            chosen = p;
359            chosen_points = ULONG_MAX;
360            /* fall through */
361        case OOM_SCAN_CONTINUE:
362            continue;
363        case OOM_SCAN_ABORT:
364            rcu_read_unlock();
365            return ERR_PTR(-1UL);
366        case OOM_SCAN_OK:
367            break;
368        };
369        points = oom_badness(p, NULL, nodemask, totalpages);
370        if (points > chosen_points) {
371            chosen = p;
372            chosen_points = points;
373        }
374    } while_each_thread(g, p);
375    if (chosen)
376        get_task_struct(chosen);
377    rcu_read_unlock();
378
379    *ppoints = chosen_points * 1000 / totalpages;
380    return chosen;
381}
382
383/**
384 * dump_tasks - dump current memory state of all system tasks
385 * @memcg: current's memory controller, if constrained
386 * @nodemask: nodemask passed to page allocator for mempolicy ooms
387 *
388 * Dumps the current memory state of all eligible tasks. Tasks not in the same
389 * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
390 * are not shown.
391 * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes,
392 * swapents, oom_score_adj value, and name.
393 */
394static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask)
395{
396    struct task_struct *p;
397    struct task_struct *task;
398
399    pr_info("[ pid ] uid tgid total_vm rss nr_ptes swapents oom_score_adj name\n");
400    rcu_read_lock();
401    for_each_process(p) {
402        if (oom_unkillable_task(p, memcg, nodemask))
403            continue;
404
405        task = find_lock_task_mm(p);
406        if (!task) {
407            /*
408             * This is a kthread or all of p's threads have already
409             * detached their mm's. There's no need to report
410             * them; they can't be oom killed anyway.
411             */
412            continue;
413        }
414
415        pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu %5d %s\n",
416            task->pid, from_kuid(&init_user_ns, task_uid(task)),
417            task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
418            task->mm->nr_ptes,
419            get_mm_counter(task->mm, MM_SWAPENTS),
420            task->signal->oom_score_adj, task->comm);
421        task_unlock(task);
422    }
423    rcu_read_unlock();
424}
425
426static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
427            struct mem_cgroup *memcg, const nodemask_t *nodemask)
428{
429    task_lock(current);
430    pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
431        "oom_adj=%d, oom_score_adj=%d\n",
432        current->comm, gfp_mask, order, current->signal->oom_adj,
433        current->signal->oom_score_adj);
434    cpuset_print_task_mems_allowed(current);
435    task_unlock(current);
436    dump_stack();
437    mem_cgroup_print_oom_info(memcg, p);
438    show_mem(SHOW_MEM_FILTER_NODES);
439    if (sysctl_oom_dump_tasks)
440        dump_tasks(memcg, nodemask);
441}
442
443#define K(x) ((x) << (PAGE_SHIFT-10))
444/*
445 * Must be called while holding a reference to p, which will be released upon
446 * returning.
447 */
448void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
449              unsigned int points, unsigned long totalpages,
450              struct mem_cgroup *memcg, nodemask_t *nodemask,
451              const char *message)
452{
453    struct task_struct *victim = p;
454    struct task_struct *child;
455    struct task_struct *t = p;
456    struct mm_struct *mm;
457    unsigned int victim_points = 0;
458    static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
459                          DEFAULT_RATELIMIT_BURST);
460
461    /*
462     * If the task is already exiting, don't alarm the sysadmin or kill
463     * its children or threads, just set TIF_MEMDIE so it can die quickly
464     */
465    if (p->flags & PF_EXITING) {
466        set_tsk_thread_flag(p, TIF_MEMDIE);
467        put_task_struct(p);
468        return;
469    }
470
471    if (__ratelimit(&oom_rs))
472        dump_header(p, gfp_mask, order, memcg, nodemask);
473
474    task_lock(p);
475    pr_err("%s: Kill process %d (%s) score %d or sacrifice child\n",
476        message, task_pid_nr(p), p->comm, points);
477    task_unlock(p);
478
479    /*
480     * If any of p's children has a different mm and is eligible for kill,
481     * the one with the highest oom_badness() score is sacrificed for its
482     * parent. This attempts to lose the minimal amount of work done while
483     * still freeing memory.
484     */
485    read_lock(&tasklist_lock);
486    do {
487        list_for_each_entry(child, &t->children, sibling) {
488            unsigned int child_points;
489
490            if (child->mm == p->mm)
491                continue;
492            /*
493             * oom_badness() returns 0 if the thread is unkillable
494             */
495            child_points = oom_badness(child, memcg, nodemask,
496                                totalpages);
497            if (child_points > victim_points) {
498                put_task_struct(victim);
499                victim = child;
500                victim_points = child_points;
501                get_task_struct(victim);
502            }
503        }
504    } while_each_thread(p, t);
505    read_unlock(&tasklist_lock);
506
507    rcu_read_lock();
508    p = find_lock_task_mm(victim);
509    if (!p) {
510        rcu_read_unlock();
511        put_task_struct(victim);
512        return;
513    } else if (victim != p) {
514        get_task_struct(p);
515        put_task_struct(victim);
516        victim = p;
517    }
518
519    /* mm cannot safely be dereferenced after task_unlock(victim) */
520    mm = victim->mm;
521    pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
522        task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
523        K(get_mm_counter(victim->mm, MM_ANONPAGES)),
524        K(get_mm_counter(victim->mm, MM_FILEPAGES)));
525    task_unlock(victim);
526
527    /*
528     * Kill all user processes sharing victim->mm in other thread groups, if
529     * any. They don't get access to memory reserves, though, to avoid
530     * depletion of all memory. This prevents mm->mmap_sem livelock when an
531     * oom killed thread cannot exit because it requires the semaphore and
532     * its contended by another thread trying to allocate memory itself.
533     * That thread will now get access to memory reserves since it has a
534     * pending fatal signal.
535     */
536    for_each_process(p)
537        if (p->mm == mm && !same_thread_group(p, victim) &&
538            !(p->flags & PF_KTHREAD)) {
539            if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
540                continue;
541
542            task_lock(p); /* Protect ->comm from prctl() */
543            pr_err("Kill process %d (%s) sharing same memory\n",
544                task_pid_nr(p), p->comm);
545            task_unlock(p);
546            do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
547        }
548    rcu_read_unlock();
549
550    set_tsk_thread_flag(victim, TIF_MEMDIE);
551    do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
552    put_task_struct(victim);
553}
554#undef K
555
556/*
557 * Determines whether the kernel must panic because of the panic_on_oom sysctl.
558 */
559void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
560            int order, const nodemask_t *nodemask)
561{
562    if (likely(!sysctl_panic_on_oom))
563        return;
564    if (sysctl_panic_on_oom != 2) {
565        /*
566         * panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel
567         * does not panic for cpuset, mempolicy, or memcg allocation
568         * failures.
569         */
570        if (constraint != CONSTRAINT_NONE)
571            return;
572    }
573    dump_header(NULL, gfp_mask, order, NULL, nodemask);
574    panic("Out of memory: %s panic_on_oom is enabled\n",
575        sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
576}
577
578static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
579
580int register_oom_notifier(struct notifier_block *nb)
581{
582    return blocking_notifier_chain_register(&oom_notify_list, nb);
583}
584EXPORT_SYMBOL_GPL(register_oom_notifier);
585
586int unregister_oom_notifier(struct notifier_block *nb)
587{
588    return blocking_notifier_chain_unregister(&oom_notify_list, nb);
589}
590EXPORT_SYMBOL_GPL(unregister_oom_notifier);
591
592/*
593 * Try to acquire the OOM killer lock for the zones in zonelist. Returns zero
594 * if a parallel OOM killing is already taking place that includes a zone in
595 * the zonelist. Otherwise, locks all zones in the zonelist and returns 1.
596 */
597int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
598{
599    struct zoneref *z;
600    struct zone *zone;
601    int ret = 1;
602
603    spin_lock(&zone_scan_lock);
604    for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
605        if (zone_is_oom_locked(zone)) {
606            ret = 0;
607            goto out;
608        }
609    }
610
611    for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
612        /*
613         * Lock each zone in the zonelist under zone_scan_lock so a
614         * parallel invocation of try_set_zonelist_oom() doesn't succeed
615         * when it shouldn't.
616         */
617        zone_set_flag(zone, ZONE_OOM_LOCKED);
618    }
619
620out:
621    spin_unlock(&zone_scan_lock);
622    return ret;
623}
624
625/*
626 * Clears the ZONE_OOM_LOCKED flag for all zones in the zonelist so that failed
627 * allocation attempts with zonelists containing them may now recall the OOM
628 * killer, if necessary.
629 */
630void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
631{
632    struct zoneref *z;
633    struct zone *zone;
634
635    spin_lock(&zone_scan_lock);
636    for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
637        zone_clear_flag(zone, ZONE_OOM_LOCKED);
638    }
639    spin_unlock(&zone_scan_lock);
640}
641
642/*
643 * Try to acquire the oom killer lock for all system zones. Returns zero if a
644 * parallel oom killing is taking place, otherwise locks all zones and returns
645 * non-zero.
646 */
647static int try_set_system_oom(void)
648{
649    struct zone *zone;
650    int ret = 1;
651
652    spin_lock(&zone_scan_lock);
653    for_each_populated_zone(zone)
654        if (zone_is_oom_locked(zone)) {
655            ret = 0;
656            goto out;
657        }
658    for_each_populated_zone(zone)
659        zone_set_flag(zone, ZONE_OOM_LOCKED);
660out:
661    spin_unlock(&zone_scan_lock);
662    return ret;
663}
664
665/*
666 * Clears ZONE_OOM_LOCKED for all system zones so that failed allocation
667 * attempts or page faults may now recall the oom killer, if necessary.
668 */
669static void clear_system_oom(void)
670{
671    struct zone *zone;
672
673    spin_lock(&zone_scan_lock);
674    for_each_populated_zone(zone)
675        zone_clear_flag(zone, ZONE_OOM_LOCKED);
676    spin_unlock(&zone_scan_lock);
677}
678
679/**
680 * out_of_memory - kill the "best" process when we run out of memory
681 * @zonelist: zonelist pointer
682 * @gfp_mask: memory allocation flags
683 * @order: amount of memory being requested as a power of 2
684 * @nodemask: nodemask passed to page allocator
685 * @force_kill: true if a task must be killed, even if others are exiting
686 *
687 * If we run out of memory, we have the choice between either
688 * killing a random task (bad), letting the system crash (worse)
689 * OR try to be smart about which process to kill. Note that we
690 * don't have to be perfect here, we just have to be good.
691 */
692void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
693        int order, nodemask_t *nodemask, bool force_kill)
694{
695    const nodemask_t *mpol_mask;
696    struct task_struct *p;
697    unsigned long totalpages;
698    unsigned long freed = 0;
699    unsigned int uninitialized_var(points);
700    enum oom_constraint constraint = CONSTRAINT_NONE;
701    int killed = 0;
702
703    blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
704    if (freed > 0)
705        /* Got some memory back in the last second. */
706        return;
707
708    /*
709     * If current has a pending SIGKILL, then automatically select it. The
710     * goal is to allow it to allocate so that it may quickly exit and free
711     * its memory.
712     */
713    if (fatal_signal_pending(current)) {
714        set_thread_flag(TIF_MEMDIE);
715        return;
716    }
717
718    /*
719     * Check if there were limitations on the allocation (only relevant for
720     * NUMA) that may require different handling.
721     */
722    constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
723                        &totalpages);
724    mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
725    check_panic_on_oom(constraint, gfp_mask, order, mpol_mask);
726
727    if (sysctl_oom_kill_allocating_task && current->mm &&
728        !oom_unkillable_task(current, NULL, nodemask) &&
729        current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
730        get_task_struct(current);
731        oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
732                 nodemask,
733                 "Out of memory (oom_kill_allocating_task)");
734        goto out;
735    }
736
737    p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
738    /* Found nothing?!?! Either we hang forever, or we panic. */
739    if (!p) {
740        dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
741        panic("Out of memory and no killable processes...\n");
742    }
743    if (PTR_ERR(p) != -1UL) {
744        oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
745                 nodemask, "Out of memory");
746        killed = 1;
747    }
748out:
749    /*
750     * Give the killed threads a good chance of exiting before trying to
751     * allocate memory again.
752     */
753    if (killed)
754        schedule_timeout_killable(1);
755}
756
757/*
758 * The pagefault handler calls here because it is out of memory, so kill a
759 * memory-hogging task. If a populated zone has ZONE_OOM_LOCKED set, a parallel
760 * oom killing is already in progress so do nothing. If a task is found with
761 * TIF_MEMDIE set, it has been killed so do nothing and allow it to exit.
762 */
763void pagefault_out_of_memory(void)
764{
765    if (try_set_system_oom()) {
766        out_of_memory(NULL, 0, 0, NULL, false);
767        clear_system_oom();
768    }
769    schedule_timeout_killable(1);
770}
771

Archive Download this file



interactive