Root/mm/vmstat.c

1/*
2 * linux/mm/vmstat.c
3 *
4 * Manages VM statistics
5 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
6 *
7 * zoned VM statistics
8 * Copyright (C) 2006 Silicon Graphics, Inc.,
9 * Christoph Lameter <christoph@lameter.com>
10 */
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/err.h>
14#include <linux/module.h>
15#include <linux/slab.h>
16#include <linux/cpu.h>
17#include <linux/vmstat.h>
18#include <linux/sched.h>
19#include <linux/math64.h>
20
21#ifdef CONFIG_VM_EVENT_COUNTERS
22DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
23EXPORT_PER_CPU_SYMBOL(vm_event_states);
24
25static void sum_vm_events(unsigned long *ret, const struct cpumask *cpumask)
26{
27    int cpu;
28    int i;
29
30    memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
31
32    for_each_cpu(cpu, cpumask) {
33        struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
34
35        for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
36            ret[i] += this->event[i];
37    }
38}
39
40/*
41 * Accumulate the vm event counters across all CPUs.
42 * The result is unavoidably approximate - it can change
43 * during and after execution of this function.
44*/
45void all_vm_events(unsigned long *ret)
46{
47    get_online_cpus();
48    sum_vm_events(ret, cpu_online_mask);
49    put_online_cpus();
50}
51EXPORT_SYMBOL_GPL(all_vm_events);
52
53#ifdef CONFIG_HOTPLUG
54/*
55 * Fold the foreign cpu events into our own.
56 *
57 * This is adding to the events on one processor
58 * but keeps the global counts constant.
59 */
60void vm_events_fold_cpu(int cpu)
61{
62    struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
63    int i;
64
65    for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
66        count_vm_events(i, fold_state->event[i]);
67        fold_state->event[i] = 0;
68    }
69}
70#endif /* CONFIG_HOTPLUG */
71
72#endif /* CONFIG_VM_EVENT_COUNTERS */
73
74/*
75 * Manage combined zone based / global counters
76 *
77 * vm_stat contains the global counters
78 */
79atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
80EXPORT_SYMBOL(vm_stat);
81
82#ifdef CONFIG_SMP
83
84static int calculate_threshold(struct zone *zone)
85{
86    int threshold;
87    int mem; /* memory in 128 MB units */
88
89    /*
90     * The threshold scales with the number of processors and the amount
91     * of memory per zone. More memory means that we can defer updates for
92     * longer, more processors could lead to more contention.
93      * fls() is used to have a cheap way of logarithmic scaling.
94     *
95     * Some sample thresholds:
96     *
97     * Threshold Processors (fls) Zonesize fls(mem+1)
98     * ------------------------------------------------------------------
99     * 8 1 1 0.9-1 GB 4
100     * 16 2 2 0.9-1 GB 4
101     * 20 2 2 1-2 GB 5
102     * 24 2 2 2-4 GB 6
103     * 28 2 2 4-8 GB 7
104     * 32 2 2 8-16 GB 8
105     * 4 2 2 <128M 1
106     * 30 4 3 2-4 GB 5
107     * 48 4 3 8-16 GB 8
108     * 32 8 4 1-2 GB 4
109     * 32 8 4 0.9-1GB 4
110     * 10 16 5 <128M 1
111     * 40 16 5 900M 4
112     * 70 64 7 2-4 GB 5
113     * 84 64 7 4-8 GB 6
114     * 108 512 9 4-8 GB 6
115     * 125 1024 10 8-16 GB 8
116     * 125 1024 10 16-32 GB 9
117     */
118
119    mem = zone->present_pages >> (27 - PAGE_SHIFT);
120
121    threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
122
123    /*
124     * Maximum threshold is 125
125     */
126    threshold = min(125, threshold);
127
128    return threshold;
129}
130
131/*
132 * Refresh the thresholds for each zone.
133 */
134static void refresh_zone_stat_thresholds(void)
135{
136    struct zone *zone;
137    int cpu;
138    int threshold;
139
140    for_each_populated_zone(zone) {
141        threshold = calculate_threshold(zone);
142
143        for_each_online_cpu(cpu)
144            per_cpu_ptr(zone->pageset, cpu)->stat_threshold
145                            = threshold;
146    }
147}
148
149/*
150 * For use when we know that interrupts are disabled.
151 */
152void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
153                int delta)
154{
155    struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
156
157    s8 *p = pcp->vm_stat_diff + item;
158    long x;
159
160    x = delta + *p;
161
162    if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) {
163        zone_page_state_add(x, zone, item);
164        x = 0;
165    }
166    *p = x;
167}
168EXPORT_SYMBOL(__mod_zone_page_state);
169
170/*
171 * For an unknown interrupt state
172 */
173void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
174                    int delta)
175{
176    unsigned long flags;
177
178    local_irq_save(flags);
179    __mod_zone_page_state(zone, item, delta);
180    local_irq_restore(flags);
181}
182EXPORT_SYMBOL(mod_zone_page_state);
183
184/*
185 * Optimized increment and decrement functions.
186 *
187 * These are only for a single page and therefore can take a struct page *
188 * argument instead of struct zone *. This allows the inclusion of the code
189 * generated for page_zone(page) into the optimized functions.
190 *
191 * No overflow check is necessary and therefore the differential can be
192 * incremented or decremented in place which may allow the compilers to
193 * generate better code.
194 * The increment or decrement is known and therefore one boundary check can
195 * be omitted.
196 *
197 * NOTE: These functions are very performance sensitive. Change only
198 * with care.
199 *
200 * Some processors have inc/dec instructions that are atomic vs an interrupt.
201 * However, the code must first determine the differential location in a zone
202 * based on the processor number and then inc/dec the counter. There is no
203 * guarantee without disabling preemption that the processor will not change
204 * in between and therefore the atomicity vs. interrupt cannot be exploited
205 * in a useful way here.
206 */
207void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
208{
209    struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
210    s8 *p = pcp->vm_stat_diff + item;
211
212    (*p)++;
213
214    if (unlikely(*p > pcp->stat_threshold)) {
215        int overstep = pcp->stat_threshold / 2;
216
217        zone_page_state_add(*p + overstep, zone, item);
218        *p = -overstep;
219    }
220}
221
222void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
223{
224    __inc_zone_state(page_zone(page), item);
225}
226EXPORT_SYMBOL(__inc_zone_page_state);
227
228void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
229{
230    struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
231    s8 *p = pcp->vm_stat_diff + item;
232
233    (*p)--;
234
235    if (unlikely(*p < - pcp->stat_threshold)) {
236        int overstep = pcp->stat_threshold / 2;
237
238        zone_page_state_add(*p - overstep, zone, item);
239        *p = overstep;
240    }
241}
242
243void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
244{
245    __dec_zone_state(page_zone(page), item);
246}
247EXPORT_SYMBOL(__dec_zone_page_state);
248
249void inc_zone_state(struct zone *zone, enum zone_stat_item item)
250{
251    unsigned long flags;
252
253    local_irq_save(flags);
254    __inc_zone_state(zone, item);
255    local_irq_restore(flags);
256}
257
258void inc_zone_page_state(struct page *page, enum zone_stat_item item)
259{
260    unsigned long flags;
261    struct zone *zone;
262
263    zone = page_zone(page);
264    local_irq_save(flags);
265    __inc_zone_state(zone, item);
266    local_irq_restore(flags);
267}
268EXPORT_SYMBOL(inc_zone_page_state);
269
270void dec_zone_page_state(struct page *page, enum zone_stat_item item)
271{
272    unsigned long flags;
273
274    local_irq_save(flags);
275    __dec_zone_page_state(page, item);
276    local_irq_restore(flags);
277}
278EXPORT_SYMBOL(dec_zone_page_state);
279
280/*
281 * Update the zone counters for one cpu.
282 *
283 * The cpu specified must be either the current cpu or a processor that
284 * is not online. If it is the current cpu then the execution thread must
285 * be pinned to the current cpu.
286 *
287 * Note that refresh_cpu_vm_stats strives to only access
288 * node local memory. The per cpu pagesets on remote zones are placed
289 * in the memory local to the processor using that pageset. So the
290 * loop over all zones will access a series of cachelines local to
291 * the processor.
292 *
293 * The call to zone_page_state_add updates the cachelines with the
294 * statistics in the remote zone struct as well as the global cachelines
295 * with the global counters. These could cause remote node cache line
296 * bouncing and will have to be only done when necessary.
297 */
298void refresh_cpu_vm_stats(int cpu)
299{
300    struct zone *zone;
301    int i;
302    int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
303
304    for_each_populated_zone(zone) {
305        struct per_cpu_pageset *p;
306
307        p = per_cpu_ptr(zone->pageset, cpu);
308
309        for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
310            if (p->vm_stat_diff[i]) {
311                unsigned long flags;
312                int v;
313
314                local_irq_save(flags);
315                v = p->vm_stat_diff[i];
316                p->vm_stat_diff[i] = 0;
317                local_irq_restore(flags);
318                atomic_long_add(v, &zone->vm_stat[i]);
319                global_diff[i] += v;
320#ifdef CONFIG_NUMA
321                /* 3 seconds idle till flush */
322                p->expire = 3;
323#endif
324            }
325        cond_resched();
326#ifdef CONFIG_NUMA
327        /*
328         * Deal with draining the remote pageset of this
329         * processor
330         *
331         * Check if there are pages remaining in this pageset
332         * if not then there is nothing to expire.
333         */
334        if (!p->expire || !p->pcp.count)
335            continue;
336
337        /*
338         * We never drain zones local to this processor.
339         */
340        if (zone_to_nid(zone) == numa_node_id()) {
341            p->expire = 0;
342            continue;
343        }
344
345        p->expire--;
346        if (p->expire)
347            continue;
348
349        if (p->pcp.count)
350            drain_zone_pages(zone, &p->pcp);
351#endif
352    }
353
354    for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
355        if (global_diff[i])
356            atomic_long_add(global_diff[i], &vm_stat[i]);
357}
358
359#endif
360
361#ifdef CONFIG_NUMA
362/*
363 * zonelist = the list of zones passed to the allocator
364 * z = the zone from which the allocation occurred.
365 *
366 * Must be called with interrupts disabled.
367 */
368void zone_statistics(struct zone *preferred_zone, struct zone *z)
369{
370    if (z->zone_pgdat == preferred_zone->zone_pgdat) {
371        __inc_zone_state(z, NUMA_HIT);
372    } else {
373        __inc_zone_state(z, NUMA_MISS);
374        __inc_zone_state(preferred_zone, NUMA_FOREIGN);
375    }
376    if (z->node == numa_node_id())
377        __inc_zone_state(z, NUMA_LOCAL);
378    else
379        __inc_zone_state(z, NUMA_OTHER);
380}
381#endif
382
383#ifdef CONFIG_COMPACTION
384struct contig_page_info {
385    unsigned long free_pages;
386    unsigned long free_blocks_total;
387    unsigned long free_blocks_suitable;
388};
389
390/*
391 * Calculate the number of free pages in a zone, how many contiguous
392 * pages are free and how many are large enough to satisfy an allocation of
393 * the target size. Note that this function makes no attempt to estimate
394 * how many suitable free blocks there *might* be if MOVABLE pages were
395 * migrated. Calculating that is possible, but expensive and can be
396 * figured out from userspace
397 */
398static void fill_contig_page_info(struct zone *zone,
399                unsigned int suitable_order,
400                struct contig_page_info *info)
401{
402    unsigned int order;
403
404    info->free_pages = 0;
405    info->free_blocks_total = 0;
406    info->free_blocks_suitable = 0;
407
408    for (order = 0; order < MAX_ORDER; order++) {
409        unsigned long blocks;
410
411        /* Count number of free blocks */
412        blocks = zone->free_area[order].nr_free;
413        info->free_blocks_total += blocks;
414
415        /* Count free base pages */
416        info->free_pages += blocks << order;
417
418        /* Count the suitable free blocks */
419        if (order >= suitable_order)
420            info->free_blocks_suitable += blocks <<
421                        (order - suitable_order);
422    }
423}
424
425/*
426 * A fragmentation index only makes sense if an allocation of a requested
427 * size would fail. If that is true, the fragmentation index indicates
428 * whether external fragmentation or a lack of memory was the problem.
429 * The value can be used to determine if page reclaim or compaction
430 * should be used
431 */
432static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
433{
434    unsigned long requested = 1UL << order;
435
436    if (!info->free_blocks_total)
437        return 0;
438
439    /* Fragmentation index only makes sense when a request would fail */
440    if (info->free_blocks_suitable)
441        return -1000;
442
443    /*
444     * Index is between 0 and 1 so return within 3 decimal places
445     *
446     * 0 => allocation would fail due to lack of memory
447     * 1 => allocation would fail due to fragmentation
448     */
449    return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
450}
451
452/* Same as __fragmentation index but allocs contig_page_info on stack */
453int fragmentation_index(struct zone *zone, unsigned int order)
454{
455    struct contig_page_info info;
456
457    fill_contig_page_info(zone, order, &info);
458    return __fragmentation_index(order, &info);
459}
460#endif
461
462#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
463#include <linux/proc_fs.h>
464#include <linux/seq_file.h>
465
466static char * const migratetype_names[MIGRATE_TYPES] = {
467    "Unmovable",
468    "Reclaimable",
469    "Movable",
470    "Reserve",
471    "Isolate",
472};
473
474static void *frag_start(struct seq_file *m, loff_t *pos)
475{
476    pg_data_t *pgdat;
477    loff_t node = *pos;
478    for (pgdat = first_online_pgdat();
479         pgdat && node;
480         pgdat = next_online_pgdat(pgdat))
481        --node;
482
483    return pgdat;
484}
485
486static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
487{
488    pg_data_t *pgdat = (pg_data_t *)arg;
489
490    (*pos)++;
491    return next_online_pgdat(pgdat);
492}
493
494static void frag_stop(struct seq_file *m, void *arg)
495{
496}
497
498/* Walk all the zones in a node and print using a callback */
499static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
500        void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
501{
502    struct zone *zone;
503    struct zone *node_zones = pgdat->node_zones;
504    unsigned long flags;
505
506    for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
507        if (!populated_zone(zone))
508            continue;
509
510        spin_lock_irqsave(&zone->lock, flags);
511        print(m, pgdat, zone);
512        spin_unlock_irqrestore(&zone->lock, flags);
513    }
514}
515#endif
516
517#ifdef CONFIG_PROC_FS
518static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
519                        struct zone *zone)
520{
521    int order;
522
523    seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
524    for (order = 0; order < MAX_ORDER; ++order)
525        seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
526    seq_putc(m, '\n');
527}
528
529/*
530 * This walks the free areas for each zone.
531 */
532static int frag_show(struct seq_file *m, void *arg)
533{
534    pg_data_t *pgdat = (pg_data_t *)arg;
535    walk_zones_in_node(m, pgdat, frag_show_print);
536    return 0;
537}
538
539static void pagetypeinfo_showfree_print(struct seq_file *m,
540                    pg_data_t *pgdat, struct zone *zone)
541{
542    int order, mtype;
543
544    for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
545        seq_printf(m, "Node %4d, zone %8s, type %12s ",
546                    pgdat->node_id,
547                    zone->name,
548                    migratetype_names[mtype]);
549        for (order = 0; order < MAX_ORDER; ++order) {
550            unsigned long freecount = 0;
551            struct free_area *area;
552            struct list_head *curr;
553
554            area = &(zone->free_area[order]);
555
556            list_for_each(curr, &area->free_list[mtype])
557                freecount++;
558            seq_printf(m, "%6lu ", freecount);
559        }
560        seq_putc(m, '\n');
561    }
562}
563
564/* Print out the free pages at each order for each migatetype */
565static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
566{
567    int order;
568    pg_data_t *pgdat = (pg_data_t *)arg;
569
570    /* Print header */
571    seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
572    for (order = 0; order < MAX_ORDER; ++order)
573        seq_printf(m, "%6d ", order);
574    seq_putc(m, '\n');
575
576    walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print);
577
578    return 0;
579}
580
581static void pagetypeinfo_showblockcount_print(struct seq_file *m,
582                    pg_data_t *pgdat, struct zone *zone)
583{
584    int mtype;
585    unsigned long pfn;
586    unsigned long start_pfn = zone->zone_start_pfn;
587    unsigned long end_pfn = start_pfn + zone->spanned_pages;
588    unsigned long count[MIGRATE_TYPES] = { 0, };
589
590    for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
591        struct page *page;
592
593        if (!pfn_valid(pfn))
594            continue;
595
596        page = pfn_to_page(pfn);
597
598        /* Watch for unexpected holes punched in the memmap */
599        if (!memmap_valid_within(pfn, page, zone))
600            continue;
601
602        mtype = get_pageblock_migratetype(page);
603
604        if (mtype < MIGRATE_TYPES)
605            count[mtype]++;
606    }
607
608    /* Print counts */
609    seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
610    for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
611        seq_printf(m, "%12lu ", count[mtype]);
612    seq_putc(m, '\n');
613}
614
615/* Print out the free pages at each order for each migratetype */
616static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
617{
618    int mtype;
619    pg_data_t *pgdat = (pg_data_t *)arg;
620
621    seq_printf(m, "\n%-23s", "Number of blocks type ");
622    for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
623        seq_printf(m, "%12s ", migratetype_names[mtype]);
624    seq_putc(m, '\n');
625    walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print);
626
627    return 0;
628}
629
630/*
631 * This prints out statistics in relation to grouping pages by mobility.
632 * It is expensive to collect so do not constantly read the file.
633 */
634static int pagetypeinfo_show(struct seq_file *m, void *arg)
635{
636    pg_data_t *pgdat = (pg_data_t *)arg;
637
638    /* check memoryless node */
639    if (!node_state(pgdat->node_id, N_HIGH_MEMORY))
640        return 0;
641
642    seq_printf(m, "Page block order: %d\n", pageblock_order);
643    seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
644    seq_putc(m, '\n');
645    pagetypeinfo_showfree(m, pgdat);
646    pagetypeinfo_showblockcount(m, pgdat);
647
648    return 0;
649}
650
651static const struct seq_operations fragmentation_op = {
652    .start = frag_start,
653    .next = frag_next,
654    .stop = frag_stop,
655    .show = frag_show,
656};
657
658static int fragmentation_open(struct inode *inode, struct file *file)
659{
660    return seq_open(file, &fragmentation_op);
661}
662
663static const struct file_operations fragmentation_file_operations = {
664    .open = fragmentation_open,
665    .read = seq_read,
666    .llseek = seq_lseek,
667    .release = seq_release,
668};
669
670static const struct seq_operations pagetypeinfo_op = {
671    .start = frag_start,
672    .next = frag_next,
673    .stop = frag_stop,
674    .show = pagetypeinfo_show,
675};
676
677static int pagetypeinfo_open(struct inode *inode, struct file *file)
678{
679    return seq_open(file, &pagetypeinfo_op);
680}
681
682static const struct file_operations pagetypeinfo_file_ops = {
683    .open = pagetypeinfo_open,
684    .read = seq_read,
685    .llseek = seq_lseek,
686    .release = seq_release,
687};
688
689#ifdef CONFIG_ZONE_DMA
690#define TEXT_FOR_DMA(xx) xx "_dma",
691#else
692#define TEXT_FOR_DMA(xx)
693#endif
694
695#ifdef CONFIG_ZONE_DMA32
696#define TEXT_FOR_DMA32(xx) xx "_dma32",
697#else
698#define TEXT_FOR_DMA32(xx)
699#endif
700
701#ifdef CONFIG_HIGHMEM
702#define TEXT_FOR_HIGHMEM(xx) xx "_high",
703#else
704#define TEXT_FOR_HIGHMEM(xx)
705#endif
706
707#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
708                    TEXT_FOR_HIGHMEM(xx) xx "_movable",
709
710static const char * const vmstat_text[] = {
711    /* Zoned VM counters */
712    "nr_free_pages",
713    "nr_inactive_anon",
714    "nr_active_anon",
715    "nr_inactive_file",
716    "nr_active_file",
717    "nr_unevictable",
718    "nr_mlock",
719    "nr_anon_pages",
720    "nr_mapped",
721    "nr_file_pages",
722    "nr_dirty",
723    "nr_writeback",
724    "nr_slab_reclaimable",
725    "nr_slab_unreclaimable",
726    "nr_page_table_pages",
727    "nr_kernel_stack",
728    "nr_unstable",
729    "nr_bounce",
730    "nr_vmscan_write",
731    "nr_writeback_temp",
732    "nr_isolated_anon",
733    "nr_isolated_file",
734    "nr_shmem",
735#ifdef CONFIG_NUMA
736    "numa_hit",
737    "numa_miss",
738    "numa_foreign",
739    "numa_interleave",
740    "numa_local",
741    "numa_other",
742#endif
743
744#ifdef CONFIG_VM_EVENT_COUNTERS
745    "pgpgin",
746    "pgpgout",
747    "pswpin",
748    "pswpout",
749
750    TEXTS_FOR_ZONES("pgalloc")
751
752    "pgfree",
753    "pgactivate",
754    "pgdeactivate",
755
756    "pgfault",
757    "pgmajfault",
758
759    TEXTS_FOR_ZONES("pgrefill")
760    TEXTS_FOR_ZONES("pgsteal")
761    TEXTS_FOR_ZONES("pgscan_kswapd")
762    TEXTS_FOR_ZONES("pgscan_direct")
763
764#ifdef CONFIG_NUMA
765    "zone_reclaim_failed",
766#endif
767    "pginodesteal",
768    "slabs_scanned",
769    "kswapd_steal",
770    "kswapd_inodesteal",
771    "kswapd_low_wmark_hit_quickly",
772    "kswapd_high_wmark_hit_quickly",
773    "kswapd_skip_congestion_wait",
774    "pageoutrun",
775    "allocstall",
776
777    "pgrotated",
778
779#ifdef CONFIG_COMPACTION
780    "compact_blocks_moved",
781    "compact_pages_moved",
782    "compact_pagemigrate_failed",
783    "compact_stall",
784    "compact_fail",
785    "compact_success",
786#endif
787
788#ifdef CONFIG_HUGETLB_PAGE
789    "htlb_buddy_alloc_success",
790    "htlb_buddy_alloc_fail",
791#endif
792    "unevictable_pgs_culled",
793    "unevictable_pgs_scanned",
794    "unevictable_pgs_rescued",
795    "unevictable_pgs_mlocked",
796    "unevictable_pgs_munlocked",
797    "unevictable_pgs_cleared",
798    "unevictable_pgs_stranded",
799    "unevictable_pgs_mlockfreed",
800#endif
801};
802
803static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
804                            struct zone *zone)
805{
806    int i;
807    seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
808    seq_printf(m,
809           "\n pages free %lu"
810           "\n min %lu"
811           "\n low %lu"
812           "\n high %lu"
813           "\n scanned %lu"
814           "\n spanned %lu"
815           "\n present %lu",
816           zone_page_state(zone, NR_FREE_PAGES),
817           min_wmark_pages(zone),
818           low_wmark_pages(zone),
819           high_wmark_pages(zone),
820           zone->pages_scanned,
821           zone->spanned_pages,
822           zone->present_pages);
823
824    for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
825        seq_printf(m, "\n %-12s %lu", vmstat_text[i],
826                zone_page_state(zone, i));
827
828    seq_printf(m,
829           "\n protection: (%lu",
830           zone->lowmem_reserve[0]);
831    for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
832        seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
833    seq_printf(m,
834           ")"
835           "\n pagesets");
836    for_each_online_cpu(i) {
837        struct per_cpu_pageset *pageset;
838
839        pageset = per_cpu_ptr(zone->pageset, i);
840        seq_printf(m,
841               "\n cpu: %i"
842               "\n count: %i"
843               "\n high: %i"
844               "\n batch: %i",
845               i,
846               pageset->pcp.count,
847               pageset->pcp.high,
848               pageset->pcp.batch);
849#ifdef CONFIG_SMP
850        seq_printf(m, "\n vm stats threshold: %d",
851                pageset->stat_threshold);
852#endif
853    }
854    seq_printf(m,
855           "\n all_unreclaimable: %u"
856           "\n prev_priority: %i"
857           "\n start_pfn: %lu"
858           "\n inactive_ratio: %u",
859           zone->all_unreclaimable,
860           zone->prev_priority,
861           zone->zone_start_pfn,
862           zone->inactive_ratio);
863    seq_putc(m, '\n');
864}
865
866/*
867 * Output information about zones in @pgdat.
868 */
869static int zoneinfo_show(struct seq_file *m, void *arg)
870{
871    pg_data_t *pgdat = (pg_data_t *)arg;
872    walk_zones_in_node(m, pgdat, zoneinfo_show_print);
873    return 0;
874}
875
876static const struct seq_operations zoneinfo_op = {
877    .start = frag_start, /* iterate over all zones. The same as in
878                   * fragmentation. */
879    .next = frag_next,
880    .stop = frag_stop,
881    .show = zoneinfo_show,
882};
883
884static int zoneinfo_open(struct inode *inode, struct file *file)
885{
886    return seq_open(file, &zoneinfo_op);
887}
888
889static const struct file_operations proc_zoneinfo_file_operations = {
890    .open = zoneinfo_open,
891    .read = seq_read,
892    .llseek = seq_lseek,
893    .release = seq_release,
894};
895
896static void *vmstat_start(struct seq_file *m, loff_t *pos)
897{
898    unsigned long *v;
899#ifdef CONFIG_VM_EVENT_COUNTERS
900    unsigned long *e;
901#endif
902    int i;
903
904    if (*pos >= ARRAY_SIZE(vmstat_text))
905        return NULL;
906
907#ifdef CONFIG_VM_EVENT_COUNTERS
908    v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
909            + sizeof(struct vm_event_state), GFP_KERNEL);
910#else
911    v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
912            GFP_KERNEL);
913#endif
914    m->private = v;
915    if (!v)
916        return ERR_PTR(-ENOMEM);
917    for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
918        v[i] = global_page_state(i);
919#ifdef CONFIG_VM_EVENT_COUNTERS
920    e = v + NR_VM_ZONE_STAT_ITEMS;
921    all_vm_events(e);
922    e[PGPGIN] /= 2; /* sectors -> kbytes */
923    e[PGPGOUT] /= 2;
924#endif
925    return v + *pos;
926}
927
928static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
929{
930    (*pos)++;
931    if (*pos >= ARRAY_SIZE(vmstat_text))
932        return NULL;
933    return (unsigned long *)m->private + *pos;
934}
935
936static int vmstat_show(struct seq_file *m, void *arg)
937{
938    unsigned long *l = arg;
939    unsigned long off = l - (unsigned long *)m->private;
940
941    seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
942    return 0;
943}
944
945static void vmstat_stop(struct seq_file *m, void *arg)
946{
947    kfree(m->private);
948    m->private = NULL;
949}
950
951static const struct seq_operations vmstat_op = {
952    .start = vmstat_start,
953    .next = vmstat_next,
954    .stop = vmstat_stop,
955    .show = vmstat_show,
956};
957
958static int vmstat_open(struct inode *inode, struct file *file)
959{
960    return seq_open(file, &vmstat_op);
961}
962
963static const struct file_operations proc_vmstat_file_operations = {
964    .open = vmstat_open,
965    .read = seq_read,
966    .llseek = seq_lseek,
967    .release = seq_release,
968};
969#endif /* CONFIG_PROC_FS */
970
971#ifdef CONFIG_SMP
972static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
973int sysctl_stat_interval __read_mostly = HZ;
974
975static void vmstat_update(struct work_struct *w)
976{
977    refresh_cpu_vm_stats(smp_processor_id());
978    schedule_delayed_work(&__get_cpu_var(vmstat_work),
979        round_jiffies_relative(sysctl_stat_interval));
980}
981
982static void __cpuinit start_cpu_timer(int cpu)
983{
984    struct delayed_work *work = &per_cpu(vmstat_work, cpu);
985
986    INIT_DELAYED_WORK_DEFERRABLE(work, vmstat_update);
987    schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
988}
989
990/*
991 * Use the cpu notifier to insure that the thresholds are recalculated
992 * when necessary.
993 */
994static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
995        unsigned long action,
996        void *hcpu)
997{
998    long cpu = (long)hcpu;
999
1000    switch (action) {
1001    case CPU_ONLINE:
1002    case CPU_ONLINE_FROZEN:
1003        start_cpu_timer(cpu);
1004        node_set_state(cpu_to_node(cpu), N_CPU);
1005        break;
1006    case CPU_DOWN_PREPARE:
1007    case CPU_DOWN_PREPARE_FROZEN:
1008        cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu));
1009        per_cpu(vmstat_work, cpu).work.func = NULL;
1010        break;
1011    case CPU_DOWN_FAILED:
1012    case CPU_DOWN_FAILED_FROZEN:
1013        start_cpu_timer(cpu);
1014        break;
1015    case CPU_DEAD:
1016    case CPU_DEAD_FROZEN:
1017        refresh_zone_stat_thresholds();
1018        break;
1019    default:
1020        break;
1021    }
1022    return NOTIFY_OK;
1023}
1024
1025static struct notifier_block __cpuinitdata vmstat_notifier =
1026    { &vmstat_cpuup_callback, NULL, 0 };
1027#endif
1028
1029static int __init setup_vmstat(void)
1030{
1031#ifdef CONFIG_SMP
1032    int cpu;
1033
1034    refresh_zone_stat_thresholds();
1035    register_cpu_notifier(&vmstat_notifier);
1036
1037    for_each_online_cpu(cpu)
1038        start_cpu_timer(cpu);
1039#endif
1040#ifdef CONFIG_PROC_FS
1041    proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
1042    proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
1043    proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
1044    proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
1045#endif
1046    return 0;
1047}
1048module_init(setup_vmstat)
1049
1050#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1051#include <linux/debugfs.h>
1052
1053static struct dentry *extfrag_debug_root;
1054
1055/*
1056 * Return an index indicating how much of the available free memory is
1057 * unusable for an allocation of the requested size.
1058 */
1059static int unusable_free_index(unsigned int order,
1060                struct contig_page_info *info)
1061{
1062    /* No free memory is interpreted as all free memory is unusable */
1063    if (info->free_pages == 0)
1064        return 1000;
1065
1066    /*
1067     * Index should be a value between 0 and 1. Return a value to 3
1068     * decimal places.
1069     *
1070     * 0 => no fragmentation
1071     * 1 => high fragmentation
1072     */
1073    return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1074
1075}
1076
1077static void unusable_show_print(struct seq_file *m,
1078                    pg_data_t *pgdat, struct zone *zone)
1079{
1080    unsigned int order;
1081    int index;
1082    struct contig_page_info info;
1083
1084    seq_printf(m, "Node %d, zone %8s ",
1085                pgdat->node_id,
1086                zone->name);
1087    for (order = 0; order < MAX_ORDER; ++order) {
1088        fill_contig_page_info(zone, order, &info);
1089        index = unusable_free_index(order, &info);
1090        seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1091    }
1092
1093    seq_putc(m, '\n');
1094}
1095
1096/*
1097 * Display unusable free space index
1098 *
1099 * The unusable free space index measures how much of the available free
1100 * memory cannot be used to satisfy an allocation of a given size and is a
1101 * value between 0 and 1. The higher the value, the more of free memory is
1102 * unusable and by implication, the worse the external fragmentation is. This
1103 * can be expressed as a percentage by multiplying by 100.
1104 */
1105static int unusable_show(struct seq_file *m, void *arg)
1106{
1107    pg_data_t *pgdat = (pg_data_t *)arg;
1108
1109    /* check memoryless node */
1110    if (!node_state(pgdat->node_id, N_HIGH_MEMORY))
1111        return 0;
1112
1113    walk_zones_in_node(m, pgdat, unusable_show_print);
1114
1115    return 0;
1116}
1117
1118static const struct seq_operations unusable_op = {
1119    .start = frag_start,
1120    .next = frag_next,
1121    .stop = frag_stop,
1122    .show = unusable_show,
1123};
1124
1125static int unusable_open(struct inode *inode, struct file *file)
1126{
1127    return seq_open(file, &unusable_op);
1128}
1129
1130static const struct file_operations unusable_file_ops = {
1131    .open = unusable_open,
1132    .read = seq_read,
1133    .llseek = seq_lseek,
1134    .release = seq_release,
1135};
1136
1137static void extfrag_show_print(struct seq_file *m,
1138                    pg_data_t *pgdat, struct zone *zone)
1139{
1140    unsigned int order;
1141    int index;
1142
1143    /* Alloc on stack as interrupts are disabled for zone walk */
1144    struct contig_page_info info;
1145
1146    seq_printf(m, "Node %d, zone %8s ",
1147                pgdat->node_id,
1148                zone->name);
1149    for (order = 0; order < MAX_ORDER; ++order) {
1150        fill_contig_page_info(zone, order, &info);
1151        index = __fragmentation_index(order, &info);
1152        seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1153    }
1154
1155    seq_putc(m, '\n');
1156}
1157
1158/*
1159 * Display fragmentation index for orders that allocations would fail for
1160 */
1161static int extfrag_show(struct seq_file *m, void *arg)
1162{
1163    pg_data_t *pgdat = (pg_data_t *)arg;
1164
1165    walk_zones_in_node(m, pgdat, extfrag_show_print);
1166
1167    return 0;
1168}
1169
1170static const struct seq_operations extfrag_op = {
1171    .start = frag_start,
1172    .next = frag_next,
1173    .stop = frag_stop,
1174    .show = extfrag_show,
1175};
1176
1177static int extfrag_open(struct inode *inode, struct file *file)
1178{
1179    return seq_open(file, &extfrag_op);
1180}
1181
1182static const struct file_operations extfrag_file_ops = {
1183    .open = extfrag_open,
1184    .read = seq_read,
1185    .llseek = seq_lseek,
1186    .release = seq_release,
1187};
1188
1189static int __init extfrag_debug_init(void)
1190{
1191    extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1192    if (!extfrag_debug_root)
1193        return -ENOMEM;
1194
1195    if (!debugfs_create_file("unusable_index", 0444,
1196            extfrag_debug_root, NULL, &unusable_file_ops))
1197        return -ENOMEM;
1198
1199    if (!debugfs_create_file("extfrag_index", 0444,
1200            extfrag_debug_root, NULL, &extfrag_file_ops))
1201        return -ENOMEM;
1202
1203    return 0;
1204}
1205
1206module_init(extfrag_debug_init);
1207#endif
1208

Archive Download this file



interactive