Root/kernel/sysctl.c

1/*
2 * sysctl.c: General linux system control interface
3 *
4 * Begun 24 March 1995, Stephen Tweedie
5 * Added /proc support, Dec 1995
6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9 * Dynamic registration fixes, Stephen Tweedie.
10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12 * Horn.
13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16 * Wendling.
17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
19 */
20
21#include <linux/module.h>
22#include <linux/mm.h>
23#include <linux/swap.h>
24#include <linux/slab.h>
25#include <linux/sysctl.h>
26#include <linux/signal.h>
27#include <linux/printk.h>
28#include <linux/proc_fs.h>
29#include <linux/security.h>
30#include <linux/ctype.h>
31#include <linux/kmemcheck.h>
32#include <linux/fs.h>
33#include <linux/init.h>
34#include <linux/kernel.h>
35#include <linux/kobject.h>
36#include <linux/net.h>
37#include <linux/sysrq.h>
38#include <linux/highuid.h>
39#include <linux/writeback.h>
40#include <linux/ratelimit.h>
41#include <linux/compaction.h>
42#include <linux/hugetlb.h>
43#include <linux/initrd.h>
44#include <linux/key.h>
45#include <linux/times.h>
46#include <linux/limits.h>
47#include <linux/dcache.h>
48#include <linux/dnotify.h>
49#include <linux/syscalls.h>
50#include <linux/vmstat.h>
51#include <linux/nfs_fs.h>
52#include <linux/acpi.h>
53#include <linux/reboot.h>
54#include <linux/ftrace.h>
55#include <linux/perf_event.h>
56#include <linux/kprobes.h>
57#include <linux/pipe_fs_i.h>
58#include <linux/oom.h>
59
60#include <asm/uaccess.h>
61#include <asm/processor.h>
62
63#ifdef CONFIG_X86
64#include <asm/nmi.h>
65#include <asm/stacktrace.h>
66#include <asm/io.h>
67#endif
68#ifdef CONFIG_BSD_PROCESS_ACCT
69#include <linux/acct.h>
70#endif
71#ifdef CONFIG_RT_MUTEXES
72#include <linux/rtmutex.h>
73#endif
74#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
75#include <linux/lockdep.h>
76#endif
77#ifdef CONFIG_CHR_DEV_SG
78#include <scsi/sg.h>
79#endif
80
81#ifdef CONFIG_LOCKUP_DETECTOR
82#include <linux/nmi.h>
83#endif
84
85
86#if defined(CONFIG_SYSCTL)
87
88/* External variables not in a header file. */
89extern int sysctl_overcommit_memory;
90extern int sysctl_overcommit_ratio;
91extern int max_threads;
92extern int core_uses_pid;
93extern int suid_dumpable;
94extern char core_pattern[];
95extern unsigned int core_pipe_limit;
96extern int pid_max;
97extern int min_free_kbytes;
98extern int pid_max_min, pid_max_max;
99extern int sysctl_drop_caches;
100extern int percpu_pagelist_fraction;
101extern int compat_log;
102extern int latencytop_enabled;
103extern int sysctl_nr_open_min, sysctl_nr_open_max;
104#ifndef CONFIG_MMU
105extern int sysctl_nr_trim_pages;
106#endif
107#ifdef CONFIG_BLOCK
108extern int blk_iopoll_enabled;
109#endif
110
111/* Constants used for minimum and maximum */
112#ifdef CONFIG_LOCKUP_DETECTOR
113static int sixty = 60;
114static int neg_one = -1;
115#endif
116
117static int zero;
118static int __maybe_unused one = 1;
119static int __maybe_unused two = 2;
120static unsigned long one_ul = 1;
121static int one_hundred = 100;
122#ifdef CONFIG_PRINTK
123static int ten_thousand = 10000;
124#endif
125
126/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
127static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
128
129/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
130static int maxolduid = 65535;
131static int minolduid;
132static int min_percpu_pagelist_fract = 8;
133
134static int ngroups_max = NGROUPS_MAX;
135
136#ifdef CONFIG_INOTIFY_USER
137#include <linux/inotify.h>
138#endif
139#ifdef CONFIG_SPARC
140#include <asm/system.h>
141#endif
142
143#ifdef CONFIG_SPARC64
144extern int sysctl_tsb_ratio;
145#endif
146
147#ifdef __hppa__
148extern int pwrsw_enabled;
149extern int unaligned_enabled;
150#endif
151
152#ifdef CONFIG_S390
153#ifdef CONFIG_MATHEMU
154extern int sysctl_ieee_emulation_warnings;
155#endif
156extern int sysctl_userprocess_debug;
157extern int spin_retry;
158#endif
159
160#ifdef CONFIG_IA64
161extern int no_unaligned_warning;
162extern int unaligned_dump_stack;
163#endif
164
165#ifdef CONFIG_PROC_SYSCTL
166static int proc_do_cad_pid(struct ctl_table *table, int write,
167          void __user *buffer, size_t *lenp, loff_t *ppos);
168static int proc_taint(struct ctl_table *table, int write,
169                   void __user *buffer, size_t *lenp, loff_t *ppos);
170#endif
171
172#ifdef CONFIG_MAGIC_SYSRQ
173/* Note: sysrq code uses it's own private copy */
174static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
175
176static int sysrq_sysctl_handler(ctl_table *table, int write,
177                void __user *buffer, size_t *lenp,
178                loff_t *ppos)
179{
180    int error;
181
182    error = proc_dointvec(table, write, buffer, lenp, ppos);
183    if (error)
184        return error;
185
186    if (write)
187        sysrq_toggle_support(__sysrq_enabled);
188
189    return 0;
190}
191
192#endif
193
194static struct ctl_table root_table[];
195static struct ctl_table_root sysctl_table_root;
196static struct ctl_table_header root_table_header = {
197    {{.count = 1,
198    .ctl_table = root_table,
199    .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
200    .root = &sysctl_table_root,
201    .set = &sysctl_table_root.default_set,
202};
203static struct ctl_table_root sysctl_table_root = {
204    .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
205    .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
206};
207
208static struct ctl_table kern_table[];
209static struct ctl_table vm_table[];
210static struct ctl_table fs_table[];
211static struct ctl_table debug_table[];
212static struct ctl_table dev_table[];
213extern struct ctl_table random_table[];
214#ifdef CONFIG_EPOLL
215extern struct ctl_table epoll_table[];
216#endif
217
218#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
219int sysctl_legacy_va_layout;
220#endif
221
222/* The default sysctl tables: */
223
224static struct ctl_table root_table[] = {
225    {
226        .procname = "kernel",
227        .mode = 0555,
228        .child = kern_table,
229    },
230    {
231        .procname = "vm",
232        .mode = 0555,
233        .child = vm_table,
234    },
235    {
236        .procname = "fs",
237        .mode = 0555,
238        .child = fs_table,
239    },
240    {
241        .procname = "debug",
242        .mode = 0555,
243        .child = debug_table,
244    },
245    {
246        .procname = "dev",
247        .mode = 0555,
248        .child = dev_table,
249    },
250    { }
251};
252
253#ifdef CONFIG_SCHED_DEBUG
254static int min_sched_granularity_ns = 100000; /* 100 usecs */
255static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
256static int min_wakeup_granularity_ns; /* 0 usecs */
257static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
258static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
259static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
260#endif
261
262#ifdef CONFIG_COMPACTION
263static int min_extfrag_threshold;
264static int max_extfrag_threshold = 1000;
265#endif
266
267static struct ctl_table kern_table[] = {
268    {
269        .procname = "sched_child_runs_first",
270        .data = &sysctl_sched_child_runs_first,
271        .maxlen = sizeof(unsigned int),
272        .mode = 0644,
273        .proc_handler = proc_dointvec,
274    },
275#ifdef CONFIG_SCHED_DEBUG
276    {
277        .procname = "sched_min_granularity_ns",
278        .data = &sysctl_sched_min_granularity,
279        .maxlen = sizeof(unsigned int),
280        .mode = 0644,
281        .proc_handler = sched_proc_update_handler,
282        .extra1 = &min_sched_granularity_ns,
283        .extra2 = &max_sched_granularity_ns,
284    },
285    {
286        .procname = "sched_latency_ns",
287        .data = &sysctl_sched_latency,
288        .maxlen = sizeof(unsigned int),
289        .mode = 0644,
290        .proc_handler = sched_proc_update_handler,
291        .extra1 = &min_sched_granularity_ns,
292        .extra2 = &max_sched_granularity_ns,
293    },
294    {
295        .procname = "sched_wakeup_granularity_ns",
296        .data = &sysctl_sched_wakeup_granularity,
297        .maxlen = sizeof(unsigned int),
298        .mode = 0644,
299        .proc_handler = sched_proc_update_handler,
300        .extra1 = &min_wakeup_granularity_ns,
301        .extra2 = &max_wakeup_granularity_ns,
302    },
303    {
304        .procname = "sched_tunable_scaling",
305        .data = &sysctl_sched_tunable_scaling,
306        .maxlen = sizeof(enum sched_tunable_scaling),
307        .mode = 0644,
308        .proc_handler = sched_proc_update_handler,
309        .extra1 = &min_sched_tunable_scaling,
310        .extra2 = &max_sched_tunable_scaling,
311    },
312    {
313        .procname = "sched_migration_cost",
314        .data = &sysctl_sched_migration_cost,
315        .maxlen = sizeof(unsigned int),
316        .mode = 0644,
317        .proc_handler = proc_dointvec,
318    },
319    {
320        .procname = "sched_nr_migrate",
321        .data = &sysctl_sched_nr_migrate,
322        .maxlen = sizeof(unsigned int),
323        .mode = 0644,
324        .proc_handler = proc_dointvec,
325    },
326    {
327        .procname = "sched_time_avg",
328        .data = &sysctl_sched_time_avg,
329        .maxlen = sizeof(unsigned int),
330        .mode = 0644,
331        .proc_handler = proc_dointvec,
332    },
333    {
334        .procname = "sched_shares_window",
335        .data = &sysctl_sched_shares_window,
336        .maxlen = sizeof(unsigned int),
337        .mode = 0644,
338        .proc_handler = proc_dointvec,
339    },
340    {
341        .procname = "timer_migration",
342        .data = &sysctl_timer_migration,
343        .maxlen = sizeof(unsigned int),
344        .mode = 0644,
345        .proc_handler = proc_dointvec_minmax,
346        .extra1 = &zero,
347        .extra2 = &one,
348    },
349#endif
350    {
351        .procname = "sched_rt_period_us",
352        .data = &sysctl_sched_rt_period,
353        .maxlen = sizeof(unsigned int),
354        .mode = 0644,
355        .proc_handler = sched_rt_handler,
356    },
357    {
358        .procname = "sched_rt_runtime_us",
359        .data = &sysctl_sched_rt_runtime,
360        .maxlen = sizeof(int),
361        .mode = 0644,
362        .proc_handler = sched_rt_handler,
363    },
364    {
365        .procname = "sched_compat_yield",
366        .data = &sysctl_sched_compat_yield,
367        .maxlen = sizeof(unsigned int),
368        .mode = 0644,
369        .proc_handler = proc_dointvec,
370    },
371#ifdef CONFIG_SCHED_AUTOGROUP
372    {
373        .procname = "sched_autogroup_enabled",
374        .data = &sysctl_sched_autogroup_enabled,
375        .maxlen = sizeof(unsigned int),
376        .mode = 0644,
377        .proc_handler = proc_dointvec,
378        .extra1 = &zero,
379        .extra2 = &one,
380    },
381#endif
382#ifdef CONFIG_PROVE_LOCKING
383    {
384        .procname = "prove_locking",
385        .data = &prove_locking,
386        .maxlen = sizeof(int),
387        .mode = 0644,
388        .proc_handler = proc_dointvec,
389    },
390#endif
391#ifdef CONFIG_LOCK_STAT
392    {
393        .procname = "lock_stat",
394        .data = &lock_stat,
395        .maxlen = sizeof(int),
396        .mode = 0644,
397        .proc_handler = proc_dointvec,
398    },
399#endif
400    {
401        .procname = "panic",
402        .data = &panic_timeout,
403        .maxlen = sizeof(int),
404        .mode = 0644,
405        .proc_handler = proc_dointvec,
406    },
407    {
408        .procname = "core_uses_pid",
409        .data = &core_uses_pid,
410        .maxlen = sizeof(int),
411        .mode = 0644,
412        .proc_handler = proc_dointvec,
413    },
414    {
415        .procname = "core_pattern",
416        .data = core_pattern,
417        .maxlen = CORENAME_MAX_SIZE,
418        .mode = 0644,
419        .proc_handler = proc_dostring,
420    },
421    {
422        .procname = "core_pipe_limit",
423        .data = &core_pipe_limit,
424        .maxlen = sizeof(unsigned int),
425        .mode = 0644,
426        .proc_handler = proc_dointvec,
427    },
428#ifdef CONFIG_PROC_SYSCTL
429    {
430        .procname = "tainted",
431        .maxlen = sizeof(long),
432        .mode = 0644,
433        .proc_handler = proc_taint,
434    },
435#endif
436#ifdef CONFIG_LATENCYTOP
437    {
438        .procname = "latencytop",
439        .data = &latencytop_enabled,
440        .maxlen = sizeof(int),
441        .mode = 0644,
442        .proc_handler = proc_dointvec,
443    },
444#endif
445#ifdef CONFIG_BLK_DEV_INITRD
446    {
447        .procname = "real-root-dev",
448        .data = &real_root_dev,
449        .maxlen = sizeof(int),
450        .mode = 0644,
451        .proc_handler = proc_dointvec,
452    },
453#endif
454    {
455        .procname = "print-fatal-signals",
456        .data = &print_fatal_signals,
457        .maxlen = sizeof(int),
458        .mode = 0644,
459        .proc_handler = proc_dointvec,
460    },
461#ifdef CONFIG_SPARC
462    {
463        .procname = "reboot-cmd",
464        .data = reboot_command,
465        .maxlen = 256,
466        .mode = 0644,
467        .proc_handler = proc_dostring,
468    },
469    {
470        .procname = "stop-a",
471        .data = &stop_a_enabled,
472        .maxlen = sizeof (int),
473        .mode = 0644,
474        .proc_handler = proc_dointvec,
475    },
476    {
477        .procname = "scons-poweroff",
478        .data = &scons_pwroff,
479        .maxlen = sizeof (int),
480        .mode = 0644,
481        .proc_handler = proc_dointvec,
482    },
483#endif
484#ifdef CONFIG_SPARC64
485    {
486        .procname = "tsb-ratio",
487        .data = &sysctl_tsb_ratio,
488        .maxlen = sizeof (int),
489        .mode = 0644,
490        .proc_handler = proc_dointvec,
491    },
492#endif
493#ifdef __hppa__
494    {
495        .procname = "soft-power",
496        .data = &pwrsw_enabled,
497        .maxlen = sizeof (int),
498         .mode = 0644,
499        .proc_handler = proc_dointvec,
500    },
501    {
502        .procname = "unaligned-trap",
503        .data = &unaligned_enabled,
504        .maxlen = sizeof (int),
505        .mode = 0644,
506        .proc_handler = proc_dointvec,
507    },
508#endif
509    {
510        .procname = "ctrl-alt-del",
511        .data = &C_A_D,
512        .maxlen = sizeof(int),
513        .mode = 0644,
514        .proc_handler = proc_dointvec,
515    },
516#ifdef CONFIG_FUNCTION_TRACER
517    {
518        .procname = "ftrace_enabled",
519        .data = &ftrace_enabled,
520        .maxlen = sizeof(int),
521        .mode = 0644,
522        .proc_handler = ftrace_enable_sysctl,
523    },
524#endif
525#ifdef CONFIG_STACK_TRACER
526    {
527        .procname = "stack_tracer_enabled",
528        .data = &stack_tracer_enabled,
529        .maxlen = sizeof(int),
530        .mode = 0644,
531        .proc_handler = stack_trace_sysctl,
532    },
533#endif
534#ifdef CONFIG_TRACING
535    {
536        .procname = "ftrace_dump_on_oops",
537        .data = &ftrace_dump_on_oops,
538        .maxlen = sizeof(int),
539        .mode = 0644,
540        .proc_handler = proc_dointvec,
541    },
542#endif
543#ifdef CONFIG_MODULES
544    {
545        .procname = "modprobe",
546        .data = &modprobe_path,
547        .maxlen = KMOD_PATH_LEN,
548        .mode = 0644,
549        .proc_handler = proc_dostring,
550    },
551    {
552        .procname = "modules_disabled",
553        .data = &modules_disabled,
554        .maxlen = sizeof(int),
555        .mode = 0644,
556        /* only handle a transition from default "0" to "1" */
557        .proc_handler = proc_dointvec_minmax,
558        .extra1 = &one,
559        .extra2 = &one,
560    },
561#endif
562#ifdef CONFIG_HOTPLUG
563    {
564        .procname = "hotplug",
565        .data = &uevent_helper,
566        .maxlen = UEVENT_HELPER_PATH_LEN,
567        .mode = 0644,
568        .proc_handler = proc_dostring,
569    },
570#endif
571#ifdef CONFIG_CHR_DEV_SG
572    {
573        .procname = "sg-big-buff",
574        .data = &sg_big_buff,
575        .maxlen = sizeof (int),
576        .mode = 0444,
577        .proc_handler = proc_dointvec,
578    },
579#endif
580#ifdef CONFIG_BSD_PROCESS_ACCT
581    {
582        .procname = "acct",
583        .data = &acct_parm,
584        .maxlen = 3*sizeof(int),
585        .mode = 0644,
586        .proc_handler = proc_dointvec,
587    },
588#endif
589#ifdef CONFIG_MAGIC_SYSRQ
590    {
591        .procname = "sysrq",
592        .data = &__sysrq_enabled,
593        .maxlen = sizeof (int),
594        .mode = 0644,
595        .proc_handler = sysrq_sysctl_handler,
596    },
597#endif
598#ifdef CONFIG_PROC_SYSCTL
599    {
600        .procname = "cad_pid",
601        .data = NULL,
602        .maxlen = sizeof (int),
603        .mode = 0600,
604        .proc_handler = proc_do_cad_pid,
605    },
606#endif
607    {
608        .procname = "threads-max",
609        .data = &max_threads,
610        .maxlen = sizeof(int),
611        .mode = 0644,
612        .proc_handler = proc_dointvec,
613    },
614    {
615        .procname = "random",
616        .mode = 0555,
617        .child = random_table,
618    },
619    {
620        .procname = "overflowuid",
621        .data = &overflowuid,
622        .maxlen = sizeof(int),
623        .mode = 0644,
624        .proc_handler = proc_dointvec_minmax,
625        .extra1 = &minolduid,
626        .extra2 = &maxolduid,
627    },
628    {
629        .procname = "overflowgid",
630        .data = &overflowgid,
631        .maxlen = sizeof(int),
632        .mode = 0644,
633        .proc_handler = proc_dointvec_minmax,
634        .extra1 = &minolduid,
635        .extra2 = &maxolduid,
636    },
637#ifdef CONFIG_S390
638#ifdef CONFIG_MATHEMU
639    {
640        .procname = "ieee_emulation_warnings",
641        .data = &sysctl_ieee_emulation_warnings,
642        .maxlen = sizeof(int),
643        .mode = 0644,
644        .proc_handler = proc_dointvec,
645    },
646#endif
647    {
648        .procname = "userprocess_debug",
649        .data = &show_unhandled_signals,
650        .maxlen = sizeof(int),
651        .mode = 0644,
652        .proc_handler = proc_dointvec,
653    },
654#endif
655    {
656        .procname = "pid_max",
657        .data = &pid_max,
658        .maxlen = sizeof (int),
659        .mode = 0644,
660        .proc_handler = proc_dointvec_minmax,
661        .extra1 = &pid_max_min,
662        .extra2 = &pid_max_max,
663    },
664    {
665        .procname = "panic_on_oops",
666        .data = &panic_on_oops,
667        .maxlen = sizeof(int),
668        .mode = 0644,
669        .proc_handler = proc_dointvec,
670    },
671#if defined CONFIG_PRINTK
672    {
673        .procname = "printk",
674        .data = &console_loglevel,
675        .maxlen = 4*sizeof(int),
676        .mode = 0644,
677        .proc_handler = proc_dointvec,
678    },
679    {
680        .procname = "printk_ratelimit",
681        .data = &printk_ratelimit_state.interval,
682        .maxlen = sizeof(int),
683        .mode = 0644,
684        .proc_handler = proc_dointvec_jiffies,
685    },
686    {
687        .procname = "printk_ratelimit_burst",
688        .data = &printk_ratelimit_state.burst,
689        .maxlen = sizeof(int),
690        .mode = 0644,
691        .proc_handler = proc_dointvec,
692    },
693    {
694        .procname = "printk_delay",
695        .data = &printk_delay_msec,
696        .maxlen = sizeof(int),
697        .mode = 0644,
698        .proc_handler = proc_dointvec_minmax,
699        .extra1 = &zero,
700        .extra2 = &ten_thousand,
701    },
702    {
703        .procname = "dmesg_restrict",
704        .data = &dmesg_restrict,
705        .maxlen = sizeof(int),
706        .mode = 0644,
707        .proc_handler = proc_dointvec_minmax,
708        .extra1 = &zero,
709        .extra2 = &one,
710    },
711    {
712        .procname = "kptr_restrict",
713        .data = &kptr_restrict,
714        .maxlen = sizeof(int),
715        .mode = 0644,
716        .proc_handler = proc_dointvec_minmax,
717        .extra1 = &zero,
718        .extra2 = &two,
719    },
720#endif
721    {
722        .procname = "ngroups_max",
723        .data = &ngroups_max,
724        .maxlen = sizeof (int),
725        .mode = 0444,
726        .proc_handler = proc_dointvec,
727    },
728#if defined(CONFIG_LOCKUP_DETECTOR)
729    {
730        .procname = "watchdog",
731        .data = &watchdog_enabled,
732        .maxlen = sizeof (int),
733        .mode = 0644,
734        .proc_handler = proc_dowatchdog_enabled,
735    },
736    {
737        .procname = "watchdog_thresh",
738        .data = &softlockup_thresh,
739        .maxlen = sizeof(int),
740        .mode = 0644,
741        .proc_handler = proc_dowatchdog_thresh,
742        .extra1 = &neg_one,
743        .extra2 = &sixty,
744    },
745    {
746        .procname = "softlockup_panic",
747        .data = &softlockup_panic,
748        .maxlen = sizeof(int),
749        .mode = 0644,
750        .proc_handler = proc_dointvec_minmax,
751        .extra1 = &zero,
752        .extra2 = &one,
753    },
754    {
755        .procname = "nmi_watchdog",
756        .data = &watchdog_enabled,
757        .maxlen = sizeof (int),
758        .mode = 0644,
759        .proc_handler = proc_dowatchdog_enabled,
760    },
761#endif
762#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
763    {
764        .procname = "unknown_nmi_panic",
765        .data = &unknown_nmi_panic,
766        .maxlen = sizeof (int),
767        .mode = 0644,
768        .proc_handler = proc_dointvec,
769    },
770#endif
771#if defined(CONFIG_X86)
772    {
773        .procname = "panic_on_unrecovered_nmi",
774        .data = &panic_on_unrecovered_nmi,
775        .maxlen = sizeof(int),
776        .mode = 0644,
777        .proc_handler = proc_dointvec,
778    },
779    {
780        .procname = "panic_on_io_nmi",
781        .data = &panic_on_io_nmi,
782        .maxlen = sizeof(int),
783        .mode = 0644,
784        .proc_handler = proc_dointvec,
785    },
786    {
787        .procname = "bootloader_type",
788        .data = &bootloader_type,
789        .maxlen = sizeof (int),
790        .mode = 0444,
791        .proc_handler = proc_dointvec,
792    },
793    {
794        .procname = "bootloader_version",
795        .data = &bootloader_version,
796        .maxlen = sizeof (int),
797        .mode = 0444,
798        .proc_handler = proc_dointvec,
799    },
800    {
801        .procname = "kstack_depth_to_print",
802        .data = &kstack_depth_to_print,
803        .maxlen = sizeof(int),
804        .mode = 0644,
805        .proc_handler = proc_dointvec,
806    },
807    {
808        .procname = "io_delay_type",
809        .data = &io_delay_type,
810        .maxlen = sizeof(int),
811        .mode = 0644,
812        .proc_handler = proc_dointvec,
813    },
814#endif
815#if defined(CONFIG_MMU)
816    {
817        .procname = "randomize_va_space",
818        .data = &randomize_va_space,
819        .maxlen = sizeof(int),
820        .mode = 0644,
821        .proc_handler = proc_dointvec,
822    },
823#endif
824#if defined(CONFIG_S390) && defined(CONFIG_SMP)
825    {
826        .procname = "spin_retry",
827        .data = &spin_retry,
828        .maxlen = sizeof (int),
829        .mode = 0644,
830        .proc_handler = proc_dointvec,
831    },
832#endif
833#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
834    {
835        .procname = "acpi_video_flags",
836        .data = &acpi_realmode_flags,
837        .maxlen = sizeof (unsigned long),
838        .mode = 0644,
839        .proc_handler = proc_doulongvec_minmax,
840    },
841#endif
842#ifdef CONFIG_IA64
843    {
844        .procname = "ignore-unaligned-usertrap",
845        .data = &no_unaligned_warning,
846        .maxlen = sizeof (int),
847         .mode = 0644,
848        .proc_handler = proc_dointvec,
849    },
850    {
851        .procname = "unaligned-dump-stack",
852        .data = &unaligned_dump_stack,
853        .maxlen = sizeof (int),
854        .mode = 0644,
855        .proc_handler = proc_dointvec,
856    },
857#endif
858#ifdef CONFIG_DETECT_HUNG_TASK
859    {
860        .procname = "hung_task_panic",
861        .data = &sysctl_hung_task_panic,
862        .maxlen = sizeof(int),
863        .mode = 0644,
864        .proc_handler = proc_dointvec_minmax,
865        .extra1 = &zero,
866        .extra2 = &one,
867    },
868    {
869        .procname = "hung_task_check_count",
870        .data = &sysctl_hung_task_check_count,
871        .maxlen = sizeof(unsigned long),
872        .mode = 0644,
873        .proc_handler = proc_doulongvec_minmax,
874    },
875    {
876        .procname = "hung_task_timeout_secs",
877        .data = &sysctl_hung_task_timeout_secs,
878        .maxlen = sizeof(unsigned long),
879        .mode = 0644,
880        .proc_handler = proc_dohung_task_timeout_secs,
881    },
882    {
883        .procname = "hung_task_warnings",
884        .data = &sysctl_hung_task_warnings,
885        .maxlen = sizeof(unsigned long),
886        .mode = 0644,
887        .proc_handler = proc_doulongvec_minmax,
888    },
889#endif
890#ifdef CONFIG_COMPAT
891    {
892        .procname = "compat-log",
893        .data = &compat_log,
894        .maxlen = sizeof (int),
895         .mode = 0644,
896        .proc_handler = proc_dointvec,
897    },
898#endif
899#ifdef CONFIG_RT_MUTEXES
900    {
901        .procname = "max_lock_depth",
902        .data = &max_lock_depth,
903        .maxlen = sizeof(int),
904        .mode = 0644,
905        .proc_handler = proc_dointvec,
906    },
907#endif
908    {
909        .procname = "poweroff_cmd",
910        .data = &poweroff_cmd,
911        .maxlen = POWEROFF_CMD_PATH_LEN,
912        .mode = 0644,
913        .proc_handler = proc_dostring,
914    },
915#ifdef CONFIG_KEYS
916    {
917        .procname = "keys",
918        .mode = 0555,
919        .child = key_sysctls,
920    },
921#endif
922#ifdef CONFIG_RCU_TORTURE_TEST
923    {
924        .procname = "rcutorture_runnable",
925        .data = &rcutorture_runnable,
926        .maxlen = sizeof(int),
927        .mode = 0644,
928        .proc_handler = proc_dointvec,
929    },
930#endif
931#ifdef CONFIG_PERF_EVENTS
932    {
933        .procname = "perf_event_paranoid",
934        .data = &sysctl_perf_event_paranoid,
935        .maxlen = sizeof(sysctl_perf_event_paranoid),
936        .mode = 0644,
937        .proc_handler = proc_dointvec,
938    },
939    {
940        .procname = "perf_event_mlock_kb",
941        .data = &sysctl_perf_event_mlock,
942        .maxlen = sizeof(sysctl_perf_event_mlock),
943        .mode = 0644,
944        .proc_handler = proc_dointvec,
945    },
946    {
947        .procname = "perf_event_max_sample_rate",
948        .data = &sysctl_perf_event_sample_rate,
949        .maxlen = sizeof(sysctl_perf_event_sample_rate),
950        .mode = 0644,
951        .proc_handler = proc_dointvec,
952    },
953#endif
954#ifdef CONFIG_KMEMCHECK
955    {
956        .procname = "kmemcheck",
957        .data = &kmemcheck_enabled,
958        .maxlen = sizeof(int),
959        .mode = 0644,
960        .proc_handler = proc_dointvec,
961    },
962#endif
963#ifdef CONFIG_BLOCK
964    {
965        .procname = "blk_iopoll",
966        .data = &blk_iopoll_enabled,
967        .maxlen = sizeof(int),
968        .mode = 0644,
969        .proc_handler = proc_dointvec,
970    },
971#endif
972    { }
973};
974
975static struct ctl_table vm_table[] = {
976    {
977        .procname = "overcommit_memory",
978        .data = &sysctl_overcommit_memory,
979        .maxlen = sizeof(sysctl_overcommit_memory),
980        .mode = 0644,
981        .proc_handler = proc_dointvec,
982    },
983    {
984        .procname = "panic_on_oom",
985        .data = &sysctl_panic_on_oom,
986        .maxlen = sizeof(sysctl_panic_on_oom),
987        .mode = 0644,
988        .proc_handler = proc_dointvec,
989    },
990    {
991        .procname = "oom_kill_allocating_task",
992        .data = &sysctl_oom_kill_allocating_task,
993        .maxlen = sizeof(sysctl_oom_kill_allocating_task),
994        .mode = 0644,
995        .proc_handler = proc_dointvec,
996    },
997    {
998        .procname = "oom_dump_tasks",
999        .data = &sysctl_oom_dump_tasks,
1000        .maxlen = sizeof(sysctl_oom_dump_tasks),
1001        .mode = 0644,
1002        .proc_handler = proc_dointvec,
1003    },
1004    {
1005        .procname = "overcommit_ratio",
1006        .data = &sysctl_overcommit_ratio,
1007        .maxlen = sizeof(sysctl_overcommit_ratio),
1008        .mode = 0644,
1009        .proc_handler = proc_dointvec,
1010    },
1011    {
1012        .procname = "page-cluster",
1013        .data = &page_cluster,
1014        .maxlen = sizeof(int),
1015        .mode = 0644,
1016        .proc_handler = proc_dointvec,
1017    },
1018    {
1019        .procname = "dirty_background_ratio",
1020        .data = &dirty_background_ratio,
1021        .maxlen = sizeof(dirty_background_ratio),
1022        .mode = 0644,
1023        .proc_handler = dirty_background_ratio_handler,
1024        .extra1 = &zero,
1025        .extra2 = &one_hundred,
1026    },
1027    {
1028        .procname = "dirty_background_bytes",
1029        .data = &dirty_background_bytes,
1030        .maxlen = sizeof(dirty_background_bytes),
1031        .mode = 0644,
1032        .proc_handler = dirty_background_bytes_handler,
1033        .extra1 = &one_ul,
1034    },
1035    {
1036        .procname = "dirty_ratio",
1037        .data = &vm_dirty_ratio,
1038        .maxlen = sizeof(vm_dirty_ratio),
1039        .mode = 0644,
1040        .proc_handler = dirty_ratio_handler,
1041        .extra1 = &zero,
1042        .extra2 = &one_hundred,
1043    },
1044    {
1045        .procname = "dirty_bytes",
1046        .data = &vm_dirty_bytes,
1047        .maxlen = sizeof(vm_dirty_bytes),
1048        .mode = 0644,
1049        .proc_handler = dirty_bytes_handler,
1050        .extra1 = &dirty_bytes_min,
1051    },
1052    {
1053        .procname = "dirty_writeback_centisecs",
1054        .data = &dirty_writeback_interval,
1055        .maxlen = sizeof(dirty_writeback_interval),
1056        .mode = 0644,
1057        .proc_handler = dirty_writeback_centisecs_handler,
1058    },
1059    {
1060        .procname = "dirty_expire_centisecs",
1061        .data = &dirty_expire_interval,
1062        .maxlen = sizeof(dirty_expire_interval),
1063        .mode = 0644,
1064        .proc_handler = proc_dointvec,
1065    },
1066    {
1067        .procname = "nr_pdflush_threads",
1068        .data = &nr_pdflush_threads,
1069        .maxlen = sizeof nr_pdflush_threads,
1070        .mode = 0444 /* read-only*/,
1071        .proc_handler = proc_dointvec,
1072    },
1073    {
1074        .procname = "swappiness",
1075        .data = &vm_swappiness,
1076        .maxlen = sizeof(vm_swappiness),
1077        .mode = 0644,
1078        .proc_handler = proc_dointvec_minmax,
1079        .extra1 = &zero,
1080        .extra2 = &one_hundred,
1081    },
1082#ifdef CONFIG_HUGETLB_PAGE
1083    {
1084        .procname = "nr_hugepages",
1085        .data = NULL,
1086        .maxlen = sizeof(unsigned long),
1087        .mode = 0644,
1088        .proc_handler = hugetlb_sysctl_handler,
1089        .extra1 = (void *)&hugetlb_zero,
1090        .extra2 = (void *)&hugetlb_infinity,
1091    },
1092#ifdef CONFIG_NUMA
1093    {
1094        .procname = "nr_hugepages_mempolicy",
1095        .data = NULL,
1096        .maxlen = sizeof(unsigned long),
1097        .mode = 0644,
1098        .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1099        .extra1 = (void *)&hugetlb_zero,
1100        .extra2 = (void *)&hugetlb_infinity,
1101    },
1102#endif
1103     {
1104        .procname = "hugetlb_shm_group",
1105        .data = &sysctl_hugetlb_shm_group,
1106        .maxlen = sizeof(gid_t),
1107        .mode = 0644,
1108        .proc_handler = proc_dointvec,
1109     },
1110     {
1111        .procname = "hugepages_treat_as_movable",
1112        .data = &hugepages_treat_as_movable,
1113        .maxlen = sizeof(int),
1114        .mode = 0644,
1115        .proc_handler = hugetlb_treat_movable_handler,
1116    },
1117    {
1118        .procname = "nr_overcommit_hugepages",
1119        .data = NULL,
1120        .maxlen = sizeof(unsigned long),
1121        .mode = 0644,
1122        .proc_handler = hugetlb_overcommit_handler,
1123        .extra1 = (void *)&hugetlb_zero,
1124        .extra2 = (void *)&hugetlb_infinity,
1125    },
1126#endif
1127    {
1128        .procname = "lowmem_reserve_ratio",
1129        .data = &sysctl_lowmem_reserve_ratio,
1130        .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
1131        .mode = 0644,
1132        .proc_handler = lowmem_reserve_ratio_sysctl_handler,
1133    },
1134    {
1135        .procname = "drop_caches",
1136        .data = &sysctl_drop_caches,
1137        .maxlen = sizeof(int),
1138        .mode = 0644,
1139        .proc_handler = drop_caches_sysctl_handler,
1140    },
1141#ifdef CONFIG_COMPACTION
1142    {
1143        .procname = "compact_memory",
1144        .data = &sysctl_compact_memory,
1145        .maxlen = sizeof(int),
1146        .mode = 0200,
1147        .proc_handler = sysctl_compaction_handler,
1148    },
1149    {
1150        .procname = "extfrag_threshold",
1151        .data = &sysctl_extfrag_threshold,
1152        .maxlen = sizeof(int),
1153        .mode = 0644,
1154        .proc_handler = sysctl_extfrag_handler,
1155        .extra1 = &min_extfrag_threshold,
1156        .extra2 = &max_extfrag_threshold,
1157    },
1158
1159#endif /* CONFIG_COMPACTION */
1160    {
1161        .procname = "min_free_kbytes",
1162        .data = &min_free_kbytes,
1163        .maxlen = sizeof(min_free_kbytes),
1164        .mode = 0644,
1165        .proc_handler = min_free_kbytes_sysctl_handler,
1166        .extra1 = &zero,
1167    },
1168    {
1169        .procname = "percpu_pagelist_fraction",
1170        .data = &percpu_pagelist_fraction,
1171        .maxlen = sizeof(percpu_pagelist_fraction),
1172        .mode = 0644,
1173        .proc_handler = percpu_pagelist_fraction_sysctl_handler,
1174        .extra1 = &min_percpu_pagelist_fract,
1175    },
1176#ifdef CONFIG_MMU
1177    {
1178        .procname = "max_map_count",
1179        .data = &sysctl_max_map_count,
1180        .maxlen = sizeof(sysctl_max_map_count),
1181        .mode = 0644,
1182        .proc_handler = proc_dointvec_minmax,
1183        .extra1 = &zero,
1184    },
1185#else
1186    {
1187        .procname = "nr_trim_pages",
1188        .data = &sysctl_nr_trim_pages,
1189        .maxlen = sizeof(sysctl_nr_trim_pages),
1190        .mode = 0644,
1191        .proc_handler = proc_dointvec_minmax,
1192        .extra1 = &zero,
1193    },
1194#endif
1195    {
1196        .procname = "laptop_mode",
1197        .data = &laptop_mode,
1198        .maxlen = sizeof(laptop_mode),
1199        .mode = 0644,
1200        .proc_handler = proc_dointvec_jiffies,
1201    },
1202    {
1203        .procname = "block_dump",
1204        .data = &block_dump,
1205        .maxlen = sizeof(block_dump),
1206        .mode = 0644,
1207        .proc_handler = proc_dointvec,
1208        .extra1 = &zero,
1209    },
1210    {
1211        .procname = "vfs_cache_pressure",
1212        .data = &sysctl_vfs_cache_pressure,
1213        .maxlen = sizeof(sysctl_vfs_cache_pressure),
1214        .mode = 0644,
1215        .proc_handler = proc_dointvec,
1216        .extra1 = &zero,
1217    },
1218#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1219    {
1220        .procname = "legacy_va_layout",
1221        .data = &sysctl_legacy_va_layout,
1222        .maxlen = sizeof(sysctl_legacy_va_layout),
1223        .mode = 0644,
1224        .proc_handler = proc_dointvec,
1225        .extra1 = &zero,
1226    },
1227#endif
1228#ifdef CONFIG_NUMA
1229    {
1230        .procname = "zone_reclaim_mode",
1231        .data = &zone_reclaim_mode,
1232        .maxlen = sizeof(zone_reclaim_mode),
1233        .mode = 0644,
1234        .proc_handler = proc_dointvec,
1235        .extra1 = &zero,
1236    },
1237    {
1238        .procname = "min_unmapped_ratio",
1239        .data = &sysctl_min_unmapped_ratio,
1240        .maxlen = sizeof(sysctl_min_unmapped_ratio),
1241        .mode = 0644,
1242        .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
1243        .extra1 = &zero,
1244        .extra2 = &one_hundred,
1245    },
1246    {
1247        .procname = "min_slab_ratio",
1248        .data = &sysctl_min_slab_ratio,
1249        .maxlen = sizeof(sysctl_min_slab_ratio),
1250        .mode = 0644,
1251        .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
1252        .extra1 = &zero,
1253        .extra2 = &one_hundred,
1254    },
1255#endif
1256#ifdef CONFIG_SMP
1257    {
1258        .procname = "stat_interval",
1259        .data = &sysctl_stat_interval,
1260        .maxlen = sizeof(sysctl_stat_interval),
1261        .mode = 0644,
1262        .proc_handler = proc_dointvec_jiffies,
1263    },
1264#endif
1265#ifdef CONFIG_MMU
1266    {
1267        .procname = "mmap_min_addr",
1268        .data = &dac_mmap_min_addr,
1269        .maxlen = sizeof(unsigned long),
1270        .mode = 0644,
1271        .proc_handler = mmap_min_addr_handler,
1272    },
1273#endif
1274#ifdef CONFIG_NUMA
1275    {
1276        .procname = "numa_zonelist_order",
1277        .data = &numa_zonelist_order,
1278        .maxlen = NUMA_ZONELIST_ORDER_LEN,
1279        .mode = 0644,
1280        .proc_handler = numa_zonelist_order_handler,
1281    },
1282#endif
1283#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1284   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1285    {
1286        .procname = "vdso_enabled",
1287        .data = &vdso_enabled,
1288        .maxlen = sizeof(vdso_enabled),
1289        .mode = 0644,
1290        .proc_handler = proc_dointvec,
1291        .extra1 = &zero,
1292    },
1293#endif
1294#ifdef CONFIG_HIGHMEM
1295    {
1296        .procname = "highmem_is_dirtyable",
1297        .data = &vm_highmem_is_dirtyable,
1298        .maxlen = sizeof(vm_highmem_is_dirtyable),
1299        .mode = 0644,
1300        .proc_handler = proc_dointvec_minmax,
1301        .extra1 = &zero,
1302        .extra2 = &one,
1303    },
1304#endif
1305    {
1306        .procname = "scan_unevictable_pages",
1307        .data = &scan_unevictable_pages,
1308        .maxlen = sizeof(scan_unevictable_pages),
1309        .mode = 0644,
1310        .proc_handler = scan_unevictable_handler,
1311    },
1312#ifdef CONFIG_MEMORY_FAILURE
1313    {
1314        .procname = "memory_failure_early_kill",
1315        .data = &sysctl_memory_failure_early_kill,
1316        .maxlen = sizeof(sysctl_memory_failure_early_kill),
1317        .mode = 0644,
1318        .proc_handler = proc_dointvec_minmax,
1319        .extra1 = &zero,
1320        .extra2 = &one,
1321    },
1322    {
1323        .procname = "memory_failure_recovery",
1324        .data = &sysctl_memory_failure_recovery,
1325        .maxlen = sizeof(sysctl_memory_failure_recovery),
1326        .mode = 0644,
1327        .proc_handler = proc_dointvec_minmax,
1328        .extra1 = &zero,
1329        .extra2 = &one,
1330    },
1331#endif
1332    { }
1333};
1334
1335#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1336static struct ctl_table binfmt_misc_table[] = {
1337    { }
1338};
1339#endif
1340
1341static struct ctl_table fs_table[] = {
1342    {
1343        .procname = "inode-nr",
1344        .data = &inodes_stat,
1345        .maxlen = 2*sizeof(int),
1346        .mode = 0444,
1347        .proc_handler = proc_nr_inodes,
1348    },
1349    {
1350        .procname = "inode-state",
1351        .data = &inodes_stat,
1352        .maxlen = 7*sizeof(int),
1353        .mode = 0444,
1354        .proc_handler = proc_nr_inodes,
1355    },
1356    {
1357        .procname = "file-nr",
1358        .data = &files_stat,
1359        .maxlen = sizeof(files_stat),
1360        .mode = 0444,
1361        .proc_handler = proc_nr_files,
1362    },
1363    {
1364        .procname = "file-max",
1365        .data = &files_stat.max_files,
1366        .maxlen = sizeof(files_stat.max_files),
1367        .mode = 0644,
1368        .proc_handler = proc_doulongvec_minmax,
1369    },
1370    {
1371        .procname = "nr_open",
1372        .data = &sysctl_nr_open,
1373        .maxlen = sizeof(int),
1374        .mode = 0644,
1375        .proc_handler = proc_dointvec_minmax,
1376        .extra1 = &sysctl_nr_open_min,
1377        .extra2 = &sysctl_nr_open_max,
1378    },
1379    {
1380        .procname = "dentry-state",
1381        .data = &dentry_stat,
1382        .maxlen = 6*sizeof(int),
1383        .mode = 0444,
1384        .proc_handler = proc_nr_dentry,
1385    },
1386    {
1387        .procname = "overflowuid",
1388        .data = &fs_overflowuid,
1389        .maxlen = sizeof(int),
1390        .mode = 0644,
1391        .proc_handler = proc_dointvec_minmax,
1392        .extra1 = &minolduid,
1393        .extra2 = &maxolduid,
1394    },
1395    {
1396        .procname = "overflowgid",
1397        .data = &fs_overflowgid,
1398        .maxlen = sizeof(int),
1399        .mode = 0644,
1400        .proc_handler = proc_dointvec_minmax,
1401        .extra1 = &minolduid,
1402        .extra2 = &maxolduid,
1403    },
1404#ifdef CONFIG_FILE_LOCKING
1405    {
1406        .procname = "leases-enable",
1407        .data = &leases_enable,
1408        .maxlen = sizeof(int),
1409        .mode = 0644,
1410        .proc_handler = proc_dointvec,
1411    },
1412#endif
1413#ifdef CONFIG_DNOTIFY
1414    {
1415        .procname = "dir-notify-enable",
1416        .data = &dir_notify_enable,
1417        .maxlen = sizeof(int),
1418        .mode = 0644,
1419        .proc_handler = proc_dointvec,
1420    },
1421#endif
1422#ifdef CONFIG_MMU
1423#ifdef CONFIG_FILE_LOCKING
1424    {
1425        .procname = "lease-break-time",
1426        .data = &lease_break_time,
1427        .maxlen = sizeof(int),
1428        .mode = 0644,
1429        .proc_handler = proc_dointvec,
1430    },
1431#endif
1432#ifdef CONFIG_AIO
1433    {
1434        .procname = "aio-nr",
1435        .data = &aio_nr,
1436        .maxlen = sizeof(aio_nr),
1437        .mode = 0444,
1438        .proc_handler = proc_doulongvec_minmax,
1439    },
1440    {
1441        .procname = "aio-max-nr",
1442        .data = &aio_max_nr,
1443        .maxlen = sizeof(aio_max_nr),
1444        .mode = 0644,
1445        .proc_handler = proc_doulongvec_minmax,
1446    },
1447#endif /* CONFIG_AIO */
1448#ifdef CONFIG_INOTIFY_USER
1449    {
1450        .procname = "inotify",
1451        .mode = 0555,
1452        .child = inotify_table,
1453    },
1454#endif
1455#ifdef CONFIG_EPOLL
1456    {
1457        .procname = "epoll",
1458        .mode = 0555,
1459        .child = epoll_table,
1460    },
1461#endif
1462#endif
1463    {
1464        .procname = "suid_dumpable",
1465        .data = &suid_dumpable,
1466        .maxlen = sizeof(int),
1467        .mode = 0644,
1468        .proc_handler = proc_dointvec_minmax,
1469        .extra1 = &zero,
1470        .extra2 = &two,
1471    },
1472#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1473    {
1474        .procname = "binfmt_misc",
1475        .mode = 0555,
1476        .child = binfmt_misc_table,
1477    },
1478#endif
1479    {
1480        .procname = "pipe-max-size",
1481        .data = &pipe_max_size,
1482        .maxlen = sizeof(int),
1483        .mode = 0644,
1484        .proc_handler = &pipe_proc_fn,
1485        .extra1 = &pipe_min_size,
1486    },
1487    { }
1488};
1489
1490static struct ctl_table debug_table[] = {
1491#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
1492    defined(CONFIG_S390)
1493    {
1494        .procname = "exception-trace",
1495        .data = &show_unhandled_signals,
1496        .maxlen = sizeof(int),
1497        .mode = 0644,
1498        .proc_handler = proc_dointvec
1499    },
1500#endif
1501#if defined(CONFIG_OPTPROBES)
1502    {
1503        .procname = "kprobes-optimization",
1504        .data = &sysctl_kprobes_optimization,
1505        .maxlen = sizeof(int),
1506        .mode = 0644,
1507        .proc_handler = proc_kprobes_optimization_handler,
1508        .extra1 = &zero,
1509        .extra2 = &one,
1510    },
1511#endif
1512    { }
1513};
1514
1515static struct ctl_table dev_table[] = {
1516    { }
1517};
1518
1519static DEFINE_SPINLOCK(sysctl_lock);
1520
1521/* called under sysctl_lock */
1522static int use_table(struct ctl_table_header *p)
1523{
1524    if (unlikely(p->unregistering))
1525        return 0;
1526    p->used++;
1527    return 1;
1528}
1529
1530/* called under sysctl_lock */
1531static void unuse_table(struct ctl_table_header *p)
1532{
1533    if (!--p->used)
1534        if (unlikely(p->unregistering))
1535            complete(p->unregistering);
1536}
1537
1538/* called under sysctl_lock, will reacquire if has to wait */
1539static void start_unregistering(struct ctl_table_header *p)
1540{
1541    /*
1542     * if p->used is 0, nobody will ever touch that entry again;
1543     * we'll eliminate all paths to it before dropping sysctl_lock
1544     */
1545    if (unlikely(p->used)) {
1546        struct completion wait;
1547        init_completion(&wait);
1548        p->unregistering = &wait;
1549        spin_unlock(&sysctl_lock);
1550        wait_for_completion(&wait);
1551        spin_lock(&sysctl_lock);
1552    } else {
1553        /* anything non-NULL; we'll never dereference it */
1554        p->unregistering = ERR_PTR(-EINVAL);
1555    }
1556    /*
1557     * do not remove from the list until nobody holds it; walking the
1558     * list in do_sysctl() relies on that.
1559     */
1560    list_del_init(&p->ctl_entry);
1561}
1562
1563void sysctl_head_get(struct ctl_table_header *head)
1564{
1565    spin_lock(&sysctl_lock);
1566    head->count++;
1567    spin_unlock(&sysctl_lock);
1568}
1569
1570static void free_head(struct rcu_head *rcu)
1571{
1572    kfree(container_of(rcu, struct ctl_table_header, rcu));
1573}
1574
1575void sysctl_head_put(struct ctl_table_header *head)
1576{
1577    spin_lock(&sysctl_lock);
1578    if (!--head->count)
1579        call_rcu(&head->rcu, free_head);
1580    spin_unlock(&sysctl_lock);
1581}
1582
1583struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1584{
1585    if (!head)
1586        BUG();
1587    spin_lock(&sysctl_lock);
1588    if (!use_table(head))
1589        head = ERR_PTR(-ENOENT);
1590    spin_unlock(&sysctl_lock);
1591    return head;
1592}
1593
1594void sysctl_head_finish(struct ctl_table_header *head)
1595{
1596    if (!head)
1597        return;
1598    spin_lock(&sysctl_lock);
1599    unuse_table(head);
1600    spin_unlock(&sysctl_lock);
1601}
1602
1603static struct ctl_table_set *
1604lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1605{
1606    struct ctl_table_set *set = &root->default_set;
1607    if (root->lookup)
1608        set = root->lookup(root, namespaces);
1609    return set;
1610}
1611
1612static struct list_head *
1613lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1614{
1615    struct ctl_table_set *set = lookup_header_set(root, namespaces);
1616    return &set->list;
1617}
1618
1619struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1620                        struct ctl_table_header *prev)
1621{
1622    struct ctl_table_root *root;
1623    struct list_head *header_list;
1624    struct ctl_table_header *head;
1625    struct list_head *tmp;
1626
1627    spin_lock(&sysctl_lock);
1628    if (prev) {
1629        head = prev;
1630        tmp = &prev->ctl_entry;
1631        unuse_table(prev);
1632        goto next;
1633    }
1634    tmp = &root_table_header.ctl_entry;
1635    for (;;) {
1636        head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1637
1638        if (!use_table(head))
1639            goto next;
1640        spin_unlock(&sysctl_lock);
1641        return head;
1642    next:
1643        root = head->root;
1644        tmp = tmp->next;
1645        header_list = lookup_header_list(root, namespaces);
1646        if (tmp != header_list)
1647            continue;
1648
1649        do {
1650            root = list_entry(root->root_list.next,
1651                    struct ctl_table_root, root_list);
1652            if (root == &sysctl_table_root)
1653                goto out;
1654            header_list = lookup_header_list(root, namespaces);
1655        } while (list_empty(header_list));
1656        tmp = header_list->next;
1657    }
1658out:
1659    spin_unlock(&sysctl_lock);
1660    return NULL;
1661}
1662
1663struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1664{
1665    return __sysctl_head_next(current->nsproxy, prev);
1666}
1667
1668void register_sysctl_root(struct ctl_table_root *root)
1669{
1670    spin_lock(&sysctl_lock);
1671    list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1672    spin_unlock(&sysctl_lock);
1673}
1674
1675/*
1676 * sysctl_perm does NOT grant the superuser all rights automatically, because
1677 * some sysctl variables are readonly even to root.
1678 */
1679
1680static int test_perm(int mode, int op)
1681{
1682    if (!current_euid())
1683        mode >>= 6;
1684    else if (in_egroup_p(0))
1685        mode >>= 3;
1686    if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1687        return 0;
1688    return -EACCES;
1689}
1690
1691int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1692{
1693    int error;
1694    int mode;
1695
1696    error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1697    if (error)
1698        return error;
1699
1700    if (root->permissions)
1701        mode = root->permissions(root, current->nsproxy, table);
1702    else
1703        mode = table->mode;
1704
1705    return test_perm(mode, op);
1706}
1707
1708static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1709{
1710    for (; table->procname; table++) {
1711        table->parent = parent;
1712        if (table->child)
1713            sysctl_set_parent(table, table->child);
1714    }
1715}
1716
1717static __init int sysctl_init(void)
1718{
1719    sysctl_set_parent(NULL, root_table);
1720#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1721    sysctl_check_table(current->nsproxy, root_table);
1722#endif
1723    return 0;
1724}
1725
1726core_initcall(sysctl_init);
1727
1728static struct ctl_table *is_branch_in(struct ctl_table *branch,
1729                      struct ctl_table *table)
1730{
1731    struct ctl_table *p;
1732    const char *s = branch->procname;
1733
1734    /* branch should have named subdirectory as its first element */
1735    if (!s || !branch->child)
1736        return NULL;
1737
1738    /* ... and nothing else */
1739    if (branch[1].procname)
1740        return NULL;
1741
1742    /* table should contain subdirectory with the same name */
1743    for (p = table; p->procname; p++) {
1744        if (!p->child)
1745            continue;
1746        if (p->procname && strcmp(p->procname, s) == 0)
1747            return p;
1748    }
1749    return NULL;
1750}
1751
1752/* see if attaching q to p would be an improvement */
1753static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1754{
1755    struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1756    struct ctl_table *next;
1757    int is_better = 0;
1758    int not_in_parent = !p->attached_by;
1759
1760    while ((next = is_branch_in(by, to)) != NULL) {
1761        if (by == q->attached_by)
1762            is_better = 1;
1763        if (to == p->attached_by)
1764            not_in_parent = 1;
1765        by = by->child;
1766        to = next->child;
1767    }
1768
1769    if (is_better && not_in_parent) {
1770        q->attached_by = by;
1771        q->attached_to = to;
1772        q->parent = p;
1773    }
1774}
1775
1776/**
1777 * __register_sysctl_paths - register a sysctl hierarchy
1778 * @root: List of sysctl headers to register on
1779 * @namespaces: Data to compute which lists of sysctl entries are visible
1780 * @path: The path to the directory the sysctl table is in.
1781 * @table: the top-level table structure
1782 *
1783 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1784 * array. A completely 0 filled entry terminates the table.
1785 *
1786 * The members of the &struct ctl_table structure are used as follows:
1787 *
1788 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1789 * enter a sysctl file
1790 *
1791 * data - a pointer to data for use by proc_handler
1792 *
1793 * maxlen - the maximum size in bytes of the data
1794 *
1795 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1796 *
1797 * child - a pointer to the child sysctl table if this entry is a directory, or
1798 * %NULL.
1799 *
1800 * proc_handler - the text handler routine (described below)
1801 *
1802 * de - for internal use by the sysctl routines
1803 *
1804 * extra1, extra2 - extra pointers usable by the proc handler routines
1805 *
1806 * Leaf nodes in the sysctl tree will be represented by a single file
1807 * under /proc; non-leaf nodes will be represented by directories.
1808 *
1809 * sysctl(2) can automatically manage read and write requests through
1810 * the sysctl table. The data and maxlen fields of the ctl_table
1811 * struct enable minimal validation of the values being written to be
1812 * performed, and the mode field allows minimal authentication.
1813 *
1814 * There must be a proc_handler routine for any terminal nodes
1815 * mirrored under /proc/sys (non-terminals are handled by a built-in
1816 * directory handler). Several default handlers are available to
1817 * cover common cases -
1818 *
1819 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1820 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1821 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1822 *
1823 * It is the handler's job to read the input buffer from user memory
1824 * and process it. The handler should return 0 on success.
1825 *
1826 * This routine returns %NULL on a failure to register, and a pointer
1827 * to the table header on success.
1828 */
1829struct ctl_table_header *__register_sysctl_paths(
1830    struct ctl_table_root *root,
1831    struct nsproxy *namespaces,
1832    const struct ctl_path *path, struct ctl_table *table)
1833{
1834    struct ctl_table_header *header;
1835    struct ctl_table *new, **prevp;
1836    unsigned int n, npath;
1837    struct ctl_table_set *set;
1838
1839    /* Count the path components */
1840    for (npath = 0; path[npath].procname; ++npath)
1841        ;
1842
1843    /*
1844     * For each path component, allocate a 2-element ctl_table array.
1845     * The first array element will be filled with the sysctl entry
1846     * for this, the second will be the sentinel (procname == 0).
1847     *
1848     * We allocate everything in one go so that we don't have to
1849     * worry about freeing additional memory in unregister_sysctl_table.
1850     */
1851    header = kzalloc(sizeof(struct ctl_table_header) +
1852             (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1853    if (!header)
1854        return NULL;
1855
1856    new = (struct ctl_table *) (header + 1);
1857
1858    /* Now connect the dots */
1859    prevp = &header->ctl_table;
1860    for (n = 0; n < npath; ++n, ++path) {
1861        /* Copy the procname */
1862        new->procname = path->procname;
1863        new->mode = 0555;
1864
1865        *prevp = new;
1866        prevp = &new->child;
1867
1868        new += 2;
1869    }
1870    *prevp = table;
1871    header->ctl_table_arg = table;
1872
1873    INIT_LIST_HEAD(&header->ctl_entry);
1874    header->used = 0;
1875    header->unregistering = NULL;
1876    header->root = root;
1877    sysctl_set_parent(NULL, header->ctl_table);
1878    header->count = 1;
1879#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1880    if (sysctl_check_table(namespaces, header->ctl_table)) {
1881        kfree(header);
1882        return NULL;
1883    }
1884#endif
1885    spin_lock(&sysctl_lock);
1886    header->set = lookup_header_set(root, namespaces);
1887    header->attached_by = header->ctl_table;
1888    header->attached_to = root_table;
1889    header->parent = &root_table_header;
1890    for (set = header->set; set; set = set->parent) {
1891        struct ctl_table_header *p;
1892        list_for_each_entry(p, &set->list, ctl_entry) {
1893            if (p->unregistering)
1894                continue;
1895            try_attach(p, header);
1896        }
1897    }
1898    header->parent->count++;
1899    list_add_tail(&header->ctl_entry, &header->set->list);
1900    spin_unlock(&sysctl_lock);
1901
1902    return header;
1903}
1904
1905/**
1906 * register_sysctl_table_path - register a sysctl table hierarchy
1907 * @path: The path to the directory the sysctl table is in.
1908 * @table: the top-level table structure
1909 *
1910 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1911 * array. A completely 0 filled entry terminates the table.
1912 *
1913 * See __register_sysctl_paths for more details.
1914 */
1915struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1916                        struct ctl_table *table)
1917{
1918    return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1919                    path, table);
1920}
1921
1922/**
1923 * register_sysctl_table - register a sysctl table hierarchy
1924 * @table: the top-level table structure
1925 *
1926 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1927 * array. A completely 0 filled entry terminates the table.
1928 *
1929 * See register_sysctl_paths for more details.
1930 */
1931struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1932{
1933    static const struct ctl_path null_path[] = { {} };
1934
1935    return register_sysctl_paths(null_path, table);
1936}
1937
1938/**
1939 * unregister_sysctl_table - unregister a sysctl table hierarchy
1940 * @header: the header returned from register_sysctl_table
1941 *
1942 * Unregisters the sysctl table and all children. proc entries may not
1943 * actually be removed until they are no longer used by anyone.
1944 */
1945void unregister_sysctl_table(struct ctl_table_header * header)
1946{
1947    might_sleep();
1948
1949    if (header == NULL)
1950        return;
1951
1952    spin_lock(&sysctl_lock);
1953    start_unregistering(header);
1954    if (!--header->parent->count) {
1955        WARN_ON(1);
1956        call_rcu(&header->parent->rcu, free_head);
1957    }
1958    if (!--header->count)
1959        call_rcu(&header->rcu, free_head);
1960    spin_unlock(&sysctl_lock);
1961}
1962
1963int sysctl_is_seen(struct ctl_table_header *p)
1964{
1965    struct ctl_table_set *set = p->set;
1966    int res;
1967    spin_lock(&sysctl_lock);
1968    if (p->unregistering)
1969        res = 0;
1970    else if (!set->is_seen)
1971        res = 1;
1972    else
1973        res = set->is_seen(set);
1974    spin_unlock(&sysctl_lock);
1975    return res;
1976}
1977
1978void setup_sysctl_set(struct ctl_table_set *p,
1979    struct ctl_table_set *parent,
1980    int (*is_seen)(struct ctl_table_set *))
1981{
1982    INIT_LIST_HEAD(&p->list);
1983    p->parent = parent ? parent : &sysctl_table_root.default_set;
1984    p->is_seen = is_seen;
1985}
1986
1987#else /* !CONFIG_SYSCTL */
1988struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
1989{
1990    return NULL;
1991}
1992
1993struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1994                            struct ctl_table *table)
1995{
1996    return NULL;
1997}
1998
1999void unregister_sysctl_table(struct ctl_table_header * table)
2000{
2001}
2002
2003void setup_sysctl_set(struct ctl_table_set *p,
2004    struct ctl_table_set *parent,
2005    int (*is_seen)(struct ctl_table_set *))
2006{
2007}
2008
2009void sysctl_head_put(struct ctl_table_header *head)
2010{
2011}
2012
2013#endif /* CONFIG_SYSCTL */
2014
2015/*
2016 * /proc/sys support
2017 */
2018
2019#ifdef CONFIG_PROC_SYSCTL
2020
2021static int _proc_do_string(void* data, int maxlen, int write,
2022               void __user *buffer,
2023               size_t *lenp, loff_t *ppos)
2024{
2025    size_t len;
2026    char __user *p;
2027    char c;
2028
2029    if (!data || !maxlen || !*lenp) {
2030        *lenp = 0;
2031        return 0;
2032    }
2033
2034    if (write) {
2035        len = 0;
2036        p = buffer;
2037        while (len < *lenp) {
2038            if (get_user(c, p++))
2039                return -EFAULT;
2040            if (c == 0 || c == '\n')
2041                break;
2042            len++;
2043        }
2044        if (len >= maxlen)
2045            len = maxlen-1;
2046        if(copy_from_user(data, buffer, len))
2047            return -EFAULT;
2048        ((char *) data)[len] = 0;
2049        *ppos += *lenp;
2050    } else {
2051        len = strlen(data);
2052        if (len > maxlen)
2053            len = maxlen;
2054
2055        if (*ppos > len) {
2056            *lenp = 0;
2057            return 0;
2058        }
2059
2060        data += *ppos;
2061        len -= *ppos;
2062
2063        if (len > *lenp)
2064            len = *lenp;
2065        if (len)
2066            if(copy_to_user(buffer, data, len))
2067                return -EFAULT;
2068        if (len < *lenp) {
2069            if(put_user('\n', ((char __user *) buffer) + len))
2070                return -EFAULT;
2071            len++;
2072        }
2073        *lenp = len;
2074        *ppos += len;
2075    }
2076    return 0;
2077}
2078
2079/**
2080 * proc_dostring - read a string sysctl
2081 * @table: the sysctl table
2082 * @write: %TRUE if this is a write to the sysctl file
2083 * @buffer: the user buffer
2084 * @lenp: the size of the user buffer
2085 * @ppos: file position
2086 *
2087 * Reads/writes a string from/to the user buffer. If the kernel
2088 * buffer provided is not large enough to hold the string, the
2089 * string is truncated. The copied string is %NULL-terminated.
2090 * If the string is being read by the user process, it is copied
2091 * and a newline '\n' is added. It is truncated if the buffer is
2092 * not large enough.
2093 *
2094 * Returns 0 on success.
2095 */
2096int proc_dostring(struct ctl_table *table, int write,
2097          void __user *buffer, size_t *lenp, loff_t *ppos)
2098{
2099    return _proc_do_string(table->data, table->maxlen, write,
2100                   buffer, lenp, ppos);
2101}
2102
2103static size_t proc_skip_spaces(char **buf)
2104{
2105    size_t ret;
2106    char *tmp = skip_spaces(*buf);
2107    ret = tmp - *buf;
2108    *buf = tmp;
2109    return ret;
2110}
2111
2112static void proc_skip_char(char **buf, size_t *size, const char v)
2113{
2114    while (*size) {
2115        if (**buf != v)
2116            break;
2117        (*size)--;
2118        (*buf)++;
2119    }
2120}
2121
2122#define TMPBUFLEN 22
2123/**
2124 * proc_get_long - reads an ASCII formatted integer from a user buffer
2125 *
2126 * @buf: a kernel buffer
2127 * @size: size of the kernel buffer
2128 * @val: this is where the number will be stored
2129 * @neg: set to %TRUE if number is negative
2130 * @perm_tr: a vector which contains the allowed trailers
2131 * @perm_tr_len: size of the perm_tr vector
2132 * @tr: pointer to store the trailer character
2133 *
2134 * In case of success %0 is returned and @buf and @size are updated with
2135 * the amount of bytes read. If @tr is non-NULL and a trailing
2136 * character exists (size is non-zero after returning from this
2137 * function), @tr is updated with the trailing character.
2138 */
2139static int proc_get_long(char **buf, size_t *size,
2140              unsigned long *val, bool *neg,
2141              const char *perm_tr, unsigned perm_tr_len, char *tr)
2142{
2143    int len;
2144    char *p, tmp[TMPBUFLEN];
2145
2146    if (!*size)
2147        return -EINVAL;
2148
2149    len = *size;
2150    if (len > TMPBUFLEN - 1)
2151        len = TMPBUFLEN - 1;
2152
2153    memcpy(tmp, *buf, len);
2154
2155    tmp[len] = 0;
2156    p = tmp;
2157    if (*p == '-' && *size > 1) {
2158        *neg = true;
2159        p++;
2160    } else
2161        *neg = false;
2162    if (!isdigit(*p))
2163        return -EINVAL;
2164
2165    *val = simple_strtoul(p, &p, 0);
2166
2167    len = p - tmp;
2168
2169    /* We don't know if the next char is whitespace thus we may accept
2170     * invalid integers (e.g. 1234...a) or two integers instead of one
2171     * (e.g. 123...1). So lets not allow such large numbers. */
2172    if (len == TMPBUFLEN - 1)
2173        return -EINVAL;
2174
2175    if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2176        return -EINVAL;
2177
2178    if (tr && (len < *size))
2179        *tr = *p;
2180
2181    *buf += len;
2182    *size -= len;
2183
2184    return 0;
2185}
2186
2187/**
2188 * proc_put_long - converts an integer to a decimal ASCII formatted string
2189 *
2190 * @buf: the user buffer
2191 * @size: the size of the user buffer
2192 * @val: the integer to be converted
2193 * @neg: sign of the number, %TRUE for negative
2194 *
2195 * In case of success %0 is returned and @buf and @size are updated with
2196 * the amount of bytes written.
2197 */
2198static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2199              bool neg)
2200{
2201    int len;
2202    char tmp[TMPBUFLEN], *p = tmp;
2203
2204    sprintf(p, "%s%lu", neg ? "-" : "", val);
2205    len = strlen(tmp);
2206    if (len > *size)
2207        len = *size;
2208    if (copy_to_user(*buf, tmp, len))
2209        return -EFAULT;
2210    *size -= len;
2211    *buf += len;
2212    return 0;
2213}
2214#undef TMPBUFLEN
2215
2216static int proc_put_char(void __user **buf, size_t *size, char c)
2217{
2218    if (*size) {
2219        char __user **buffer = (char __user **)buf;
2220        if (put_user(c, *buffer))
2221            return -EFAULT;
2222        (*size)--, (*buffer)++;
2223        *buf = *buffer;
2224    }
2225    return 0;
2226}
2227
2228static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2229                 int *valp,
2230                 int write, void *data)
2231{
2232    if (write) {
2233        *valp = *negp ? -*lvalp : *lvalp;
2234    } else {
2235        int val = *valp;
2236        if (val < 0) {
2237            *negp = true;
2238            *lvalp = (unsigned long)-val;
2239        } else {
2240            *negp = false;
2241            *lvalp = (unsigned long)val;
2242        }
2243    }
2244    return 0;
2245}
2246
2247static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2248
2249static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2250          int write, void __user *buffer,
2251          size_t *lenp, loff_t *ppos,
2252          int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2253                  int write, void *data),
2254          void *data)
2255{
2256    int *i, vleft, first = 1, err = 0;
2257    unsigned long page = 0;
2258    size_t left;
2259    char *kbuf;
2260    
2261    if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2262        *lenp = 0;
2263        return 0;
2264    }
2265    
2266    i = (int *) tbl_data;
2267    vleft = table->maxlen / sizeof(*i);
2268    left = *lenp;
2269
2270    if (!conv)
2271        conv = do_proc_dointvec_conv;
2272
2273    if (write) {
2274        if (left > PAGE_SIZE - 1)
2275            left = PAGE_SIZE - 1;
2276        page = __get_free_page(GFP_TEMPORARY);
2277        kbuf = (char *) page;
2278        if (!kbuf)
2279            return -ENOMEM;
2280        if (copy_from_user(kbuf, buffer, left)) {
2281            err = -EFAULT;
2282            goto free;
2283        }
2284        kbuf[left] = 0;
2285    }
2286
2287    for (; left && vleft--; i++, first=0) {
2288        unsigned long lval;
2289        bool neg;
2290
2291        if (write) {
2292            left -= proc_skip_spaces(&kbuf);
2293
2294            if (!left)
2295                break;
2296            err = proc_get_long(&kbuf, &left, &lval, &neg,
2297                         proc_wspace_sep,
2298                         sizeof(proc_wspace_sep), NULL);
2299            if (err)
2300                break;
2301            if (conv(&neg, &lval, i, 1, data)) {
2302                err = -EINVAL;
2303                break;
2304            }
2305        } else {
2306            if (conv(&neg, &lval, i, 0, data)) {
2307                err = -EINVAL;
2308                break;
2309            }
2310            if (!first)
2311                err = proc_put_char(&buffer, &left, '\t');
2312            if (err)
2313                break;
2314            err = proc_put_long(&buffer, &left, lval, neg);
2315            if (err)
2316                break;
2317        }
2318    }
2319
2320    if (!write && !first && left && !err)
2321        err = proc_put_char(&buffer, &left, '\n');
2322    if (write && !err && left)
2323        left -= proc_skip_spaces(&kbuf);
2324free:
2325    if (write) {
2326        free_page(page);
2327        if (first)
2328            return err ? : -EINVAL;
2329    }
2330    *lenp -= left;
2331    *ppos += *lenp;
2332    return err;
2333}
2334
2335static int do_proc_dointvec(struct ctl_table *table, int write,
2336          void __user *buffer, size_t *lenp, loff_t *ppos,
2337          int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2338                  int write, void *data),
2339          void *data)
2340{
2341    return __do_proc_dointvec(table->data, table, write,
2342            buffer, lenp, ppos, conv, data);
2343}
2344
2345/**
2346 * proc_dointvec - read a vector of integers
2347 * @table: the sysctl table
2348 * @write: %TRUE if this is a write to the sysctl file
2349 * @buffer: the user buffer
2350 * @lenp: the size of the user buffer
2351 * @ppos: file position
2352 *
2353 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2354 * values from/to the user buffer, treated as an ASCII string.
2355 *
2356 * Returns 0 on success.
2357 */
2358int proc_dointvec(struct ctl_table *table, int write,
2359             void __user *buffer, size_t *lenp, loff_t *ppos)
2360{
2361    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2362                    NULL,NULL);
2363}
2364
2365/*
2366 * Taint values can only be increased
2367 * This means we can safely use a temporary.
2368 */
2369static int proc_taint(struct ctl_table *table, int write,
2370                   void __user *buffer, size_t *lenp, loff_t *ppos)
2371{
2372    struct ctl_table t;
2373    unsigned long tmptaint = get_taint();
2374    int err;
2375
2376    if (write && !capable(CAP_SYS_ADMIN))
2377        return -EPERM;
2378
2379    t = *table;
2380    t.data = &tmptaint;
2381    err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2382    if (err < 0)
2383        return err;
2384
2385    if (write) {
2386        /*
2387         * Poor man's atomic or. Not worth adding a primitive
2388         * to everyone's atomic.h for this
2389         */
2390        int i;
2391        for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2392            if ((tmptaint >> i) & 1)
2393                add_taint(i);
2394        }
2395    }
2396
2397    return err;
2398}
2399
2400struct do_proc_dointvec_minmax_conv_param {
2401    int *min;
2402    int *max;
2403};
2404
2405static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2406                    int *valp,
2407                    int write, void *data)
2408{
2409    struct do_proc_dointvec_minmax_conv_param *param = data;
2410    if (write) {
2411        int val = *negp ? -*lvalp : *lvalp;
2412        if ((param->min && *param->min > val) ||
2413            (param->max && *param->max < val))
2414            return -EINVAL;
2415        *valp = val;
2416    } else {
2417        int val = *valp;
2418        if (val < 0) {
2419            *negp = true;
2420            *lvalp = (unsigned long)-val;
2421        } else {
2422            *negp = false;
2423            *lvalp = (unsigned long)val;
2424        }
2425    }
2426    return 0;
2427}
2428
2429/**
2430 * proc_dointvec_minmax - read a vector of integers with min/max values
2431 * @table: the sysctl table
2432 * @write: %TRUE if this is a write to the sysctl file
2433 * @buffer: the user buffer
2434 * @lenp: the size of the user buffer
2435 * @ppos: file position
2436 *
2437 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2438 * values from/to the user buffer, treated as an ASCII string.
2439 *
2440 * This routine will ensure the values are within the range specified by
2441 * table->extra1 (min) and table->extra2 (max).
2442 *
2443 * Returns 0 on success.
2444 */
2445int proc_dointvec_minmax(struct ctl_table *table, int write,
2446          void __user *buffer, size_t *lenp, loff_t *ppos)
2447{
2448    struct do_proc_dointvec_minmax_conv_param param = {
2449        .min = (int *) table->extra1,
2450        .max = (int *) table->extra2,
2451    };
2452    return do_proc_dointvec(table, write, buffer, lenp, ppos,
2453                do_proc_dointvec_minmax_conv, &param);
2454}
2455
2456static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2457                     void __user *buffer,
2458                     size_t *lenp, loff_t *ppos,
2459                     unsigned long convmul,
2460                     unsigned long convdiv)
2461{
2462    unsigned long *i, *min, *max;
2463    int vleft, first = 1, err = 0;
2464    unsigned long page = 0;
2465    size_t left;
2466    char *kbuf;
2467
2468    if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2469        *lenp = 0;
2470        return 0;
2471    }
2472
2473    i = (unsigned long *) data;
2474    min = (unsigned long *) table->extra1;
2475    max = (unsigned long *) table->extra2;
2476    vleft = table->maxlen / sizeof(unsigned long);
2477    left = *lenp;
2478
2479    if (write) {
2480        if (left > PAGE_SIZE - 1)
2481            left = PAGE_SIZE - 1;
2482        page = __get_free_page(GFP_TEMPORARY);
2483        kbuf = (char *) page;
2484        if (!kbuf)
2485            return -ENOMEM;
2486        if (copy_from_user(kbuf, buffer, left)) {
2487            err = -EFAULT;
2488            goto free;
2489        }
2490        kbuf[left] = 0;
2491    }
2492
2493    for (; left && vleft--; i++, first = 0) {
2494        unsigned long val;
2495
2496        if (write) {
2497            bool neg;
2498
2499            left -= proc_skip_spaces(&kbuf);
2500
2501            err = proc_get_long(&kbuf, &left, &val, &neg,
2502                         proc_wspace_sep,
2503                         sizeof(proc_wspace_sep), NULL);
2504            if (err)
2505                break;
2506            if (neg)
2507                continue;
2508            if ((min && val < *min) || (max && val > *max))
2509                continue;
2510            *i = val;
2511        } else {
2512            val = convdiv * (*i) / convmul;
2513            if (!first)
2514                err = proc_put_char(&buffer, &left, '\t');
2515            err = proc_put_long(&buffer, &left, val, false);
2516            if (err)
2517                break;
2518        }
2519    }
2520
2521    if (!write && !first && left && !err)
2522        err = proc_put_char(&buffer, &left, '\n');
2523    if (write && !err)
2524        left -= proc_skip_spaces(&kbuf);
2525free:
2526    if (write) {
2527        free_page(page);
2528        if (first)
2529            return err ? : -EINVAL;
2530    }
2531    *lenp -= left;
2532    *ppos += *lenp;
2533    return err;
2534}
2535
2536static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2537                     void __user *buffer,
2538                     size_t *lenp, loff_t *ppos,
2539                     unsigned long convmul,
2540                     unsigned long convdiv)
2541{
2542    return __do_proc_doulongvec_minmax(table->data, table, write,
2543            buffer, lenp, ppos, convmul, convdiv);
2544}
2545
2546/**
2547 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2548 * @table: the sysctl table
2549 * @write: %TRUE if this is a write to the sysctl file
2550 * @buffer: the user buffer
2551 * @lenp: the size of the user buffer
2552 * @ppos: file position
2553 *
2554 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2555 * values from/to the user buffer, treated as an ASCII string.
2556 *
2557 * This routine will ensure the values are within the range specified by
2558 * table->extra1 (min) and table->extra2 (max).
2559 *
2560 * Returns 0 on success.
2561 */
2562int proc_doulongvec_minmax(struct ctl_table *table, int write,
2563               void __user *buffer, size_t *lenp, loff_t *ppos)
2564{
2565    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2566}
2567
2568/**
2569 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2570 * @table: the sysctl table
2571 * @write: %TRUE if this is a write to the sysctl file
2572 * @buffer: the user buffer
2573 * @lenp: the size of the user buffer
2574 * @ppos: file position
2575 *
2576 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2577 * values from/to the user buffer, treated as an ASCII string. The values
2578 * are treated as milliseconds, and converted to jiffies when they are stored.
2579 *
2580 * This routine will ensure the values are within the range specified by
2581 * table->extra1 (min) and table->extra2 (max).
2582 *
2583 * Returns 0 on success.
2584 */
2585int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2586                      void __user *buffer,
2587                      size_t *lenp, loff_t *ppos)
2588{
2589    return do_proc_doulongvec_minmax(table, write, buffer,
2590                     lenp, ppos, HZ, 1000l);
2591}
2592
2593
2594static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2595                     int *valp,
2596                     int write, void *data)
2597{
2598    if (write) {
2599        if (*lvalp > LONG_MAX / HZ)
2600            return 1;
2601        *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2602    } else {
2603        int val = *valp;
2604        unsigned long lval;
2605        if (val < 0) {
2606            *negp = true;
2607            lval = (unsigned long)-val;
2608        } else {
2609            *negp = false;
2610            lval = (unsigned long)val;
2611        }
2612        *lvalp = lval / HZ;
2613    }
2614    return 0;
2615}
2616
2617static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2618                        int *valp,
2619                        int write, void *data)
2620{
2621    if (write) {
2622        if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2623            return 1;
2624        *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2625    } else {
2626        int val = *valp;
2627        unsigned long lval;
2628        if (val < 0) {
2629            *negp = true;
2630            lval = (unsigned long)-val;
2631        } else {
2632            *negp = false;
2633            lval = (unsigned long)val;
2634        }
2635        *lvalp = jiffies_to_clock_t(lval);
2636    }
2637    return 0;
2638}
2639
2640static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2641                        int *valp,
2642                        int write, void *data)
2643{
2644    if (write) {
2645        *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2646    } else {
2647        int val = *valp;
2648        unsigned long lval;
2649        if (val < 0) {
2650            *negp = true;
2651            lval = (unsigned long)-val;
2652        } else {
2653            *negp = false;
2654            lval = (unsigned long)val;
2655        }
2656        *lvalp = jiffies_to_msecs(lval);
2657    }
2658    return 0;
2659}
2660
2661/**
2662 * proc_dointvec_jiffies - read a vector of integers as seconds
2663 * @table: the sysctl table
2664 * @write: %TRUE if this is a write to the sysctl file
2665 * @buffer: the user buffer
2666 * @lenp: the size of the user buffer
2667 * @ppos: file position
2668 *
2669 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2670 * values from/to the user buffer, treated as an ASCII string.
2671 * The values read are assumed to be in seconds, and are converted into
2672 * jiffies.
2673 *
2674 * Returns 0 on success.
2675 */
2676int proc_dointvec_jiffies(struct ctl_table *table, int write,
2677              void __user *buffer, size_t *lenp, loff_t *ppos)
2678{
2679    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2680                    do_proc_dointvec_jiffies_conv,NULL);
2681}
2682
2683/**
2684 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2685 * @table: the sysctl table
2686 * @write: %TRUE if this is a write to the sysctl file
2687 * @buffer: the user buffer
2688 * @lenp: the size of the user buffer
2689 * @ppos: pointer to the file position
2690 *
2691 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2692 * values from/to the user buffer, treated as an ASCII string.
2693 * The values read are assumed to be in 1/USER_HZ seconds, and
2694 * are converted into jiffies.
2695 *
2696 * Returns 0 on success.
2697 */
2698int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2699                 void __user *buffer, size_t *lenp, loff_t *ppos)
2700{
2701    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2702                    do_proc_dointvec_userhz_jiffies_conv,NULL);
2703}
2704
2705/**
2706 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2707 * @table: the sysctl table
2708 * @write: %TRUE if this is a write to the sysctl file
2709 * @buffer: the user buffer
2710 * @lenp: the size of the user buffer
2711 * @ppos: file position
2712 * @ppos: the current position in the file
2713 *
2714 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2715 * values from/to the user buffer, treated as an ASCII string.
2716 * The values read are assumed to be in 1/1000 seconds, and
2717 * are converted into jiffies.
2718 *
2719 * Returns 0 on success.
2720 */
2721int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2722                 void __user *buffer, size_t *lenp, loff_t *ppos)
2723{
2724    return do_proc_dointvec(table, write, buffer, lenp, ppos,
2725                do_proc_dointvec_ms_jiffies_conv, NULL);
2726}
2727
2728static int proc_do_cad_pid(struct ctl_table *table, int write,
2729               void __user *buffer, size_t *lenp, loff_t *ppos)
2730{
2731    struct pid *new_pid;
2732    pid_t tmp;
2733    int r;
2734
2735    tmp = pid_vnr(cad_pid);
2736
2737    r = __do_proc_dointvec(&tmp, table, write, buffer,
2738                   lenp, ppos, NULL, NULL);
2739    if (r || !write)
2740        return r;
2741
2742    new_pid = find_get_pid(tmp);
2743    if (!new_pid)
2744        return -ESRCH;
2745
2746    put_pid(xchg(&cad_pid, new_pid));
2747    return 0;
2748}
2749
2750/**
2751 * proc_do_large_bitmap - read/write from/to a large bitmap
2752 * @table: the sysctl table
2753 * @write: %TRUE if this is a write to the sysctl file
2754 * @buffer: the user buffer
2755 * @lenp: the size of the user buffer
2756 * @ppos: file position
2757 *
2758 * The bitmap is stored at table->data and the bitmap length (in bits)
2759 * in table->maxlen.
2760 *
2761 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2762 * large bitmaps may be represented in a compact manner. Writing into
2763 * the file will clear the bitmap then update it with the given input.
2764 *
2765 * Returns 0 on success.
2766 */
2767int proc_do_large_bitmap(struct ctl_table *table, int write,
2768             void __user *buffer, size_t *lenp, loff_t *ppos)
2769{
2770    int err = 0;
2771    bool first = 1;
2772    size_t left = *lenp;
2773    unsigned long bitmap_len = table->maxlen;
2774    unsigned long *bitmap = (unsigned long *) table->data;
2775    unsigned long *tmp_bitmap = NULL;
2776    char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2777
2778    if (!bitmap_len || !left || (*ppos && !write)) {
2779        *lenp = 0;
2780        return 0;
2781    }
2782
2783    if (write) {
2784        unsigned long page = 0;
2785        char *kbuf;
2786
2787        if (left > PAGE_SIZE - 1)
2788            left = PAGE_SIZE - 1;
2789
2790        page = __get_free_page(GFP_TEMPORARY);
2791        kbuf = (char *) page;
2792        if (!kbuf)
2793            return -ENOMEM;
2794        if (copy_from_user(kbuf, buffer, left)) {
2795            free_page(page);
2796            return -EFAULT;
2797                }
2798        kbuf[left] = 0;
2799
2800        tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2801                     GFP_KERNEL);
2802        if (!tmp_bitmap) {
2803            free_page(page);
2804            return -ENOMEM;
2805        }
2806        proc_skip_char(&kbuf, &left, '\n');
2807        while (!err && left) {
2808            unsigned long val_a, val_b;
2809            bool neg;
2810
2811            err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2812                         sizeof(tr_a), &c);
2813            if (err)
2814                break;
2815            if (val_a >= bitmap_len || neg) {
2816                err = -EINVAL;
2817                break;
2818            }
2819
2820            val_b = val_a;
2821            if (left) {
2822                kbuf++;
2823                left--;
2824            }
2825
2826            if (c == '-') {
2827                err = proc_get_long(&kbuf, &left, &val_b,
2828                             &neg, tr_b, sizeof(tr_b),
2829                             &c);
2830                if (err)
2831                    break;
2832                if (val_b >= bitmap_len || neg ||
2833                    val_a > val_b) {
2834                    err = -EINVAL;
2835                    break;
2836                }
2837                if (left) {
2838                    kbuf++;
2839                    left--;
2840                }
2841            }
2842
2843            while (val_a <= val_b)
2844                set_bit(val_a++, tmp_bitmap);
2845
2846            first = 0;
2847            proc_skip_char(&kbuf, &left, '\n');
2848        }
2849        free_page(page);
2850    } else {
2851        unsigned long bit_a, bit_b = 0;
2852
2853        while (left) {
2854            bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2855            if (bit_a >= bitmap_len)
2856                break;
2857            bit_b = find_next_zero_bit(bitmap, bitmap_len,
2858                           bit_a + 1) - 1;
2859
2860            if (!first) {
2861                err = proc_put_char(&buffer, &left, ',');
2862                if (err)
2863                    break;
2864            }
2865            err = proc_put_long(&buffer, &left, bit_a, false);
2866            if (err)
2867                break;
2868            if (bit_a != bit_b) {
2869                err = proc_put_char(&buffer, &left, '-');
2870                if (err)
2871                    break;
2872                err = proc_put_long(&buffer, &left, bit_b, false);
2873                if (err)
2874                    break;
2875            }
2876
2877            first = 0; bit_b++;
2878        }
2879        if (!err)
2880            err = proc_put_char(&buffer, &left, '\n');
2881    }
2882
2883    if (!err) {
2884        if (write) {
2885            if (*ppos)
2886                bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2887            else
2888                memcpy(bitmap, tmp_bitmap,
2889                    BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2890        }
2891        kfree(tmp_bitmap);
2892        *lenp -= left;
2893        *ppos += *lenp;
2894        return 0;
2895    } else {
2896        kfree(tmp_bitmap);
2897        return err;
2898    }
2899}
2900
2901#else /* CONFIG_PROC_SYSCTL */
2902
2903int proc_dostring(struct ctl_table *table, int write,
2904          void __user *buffer, size_t *lenp, loff_t *ppos)
2905{
2906    return -ENOSYS;
2907}
2908
2909int proc_dointvec(struct ctl_table *table, int write,
2910          void __user *buffer, size_t *lenp, loff_t *ppos)
2911{
2912    return -ENOSYS;
2913}
2914
2915int proc_dointvec_minmax(struct ctl_table *table, int write,
2916            void __user *buffer, size_t *lenp, loff_t *ppos)
2917{
2918    return -ENOSYS;
2919}
2920
2921int proc_dointvec_jiffies(struct ctl_table *table, int write,
2922            void __user *buffer, size_t *lenp, loff_t *ppos)
2923{
2924    return -ENOSYS;
2925}
2926
2927int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2928            void __user *buffer, size_t *lenp, loff_t *ppos)
2929{
2930    return -ENOSYS;
2931}
2932
2933int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2934                 void __user *buffer, size_t *lenp, loff_t *ppos)
2935{
2936    return -ENOSYS;
2937}
2938
2939int proc_doulongvec_minmax(struct ctl_table *table, int write,
2940            void __user *buffer, size_t *lenp, loff_t *ppos)
2941{
2942    return -ENOSYS;
2943}
2944
2945int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2946                      void __user *buffer,
2947                      size_t *lenp, loff_t *ppos)
2948{
2949    return -ENOSYS;
2950}
2951
2952
2953#endif /* CONFIG_PROC_SYSCTL */
2954
2955/*
2956 * No sense putting this after each symbol definition, twice,
2957 * exception granted :-)
2958 */
2959EXPORT_SYMBOL(proc_dointvec);
2960EXPORT_SYMBOL(proc_dointvec_jiffies);
2961EXPORT_SYMBOL(proc_dointvec_minmax);
2962EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2963EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2964EXPORT_SYMBOL(proc_dostring);
2965EXPORT_SYMBOL(proc_doulongvec_minmax);
2966EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2967EXPORT_SYMBOL(register_sysctl_table);
2968EXPORT_SYMBOL(register_sysctl_paths);
2969EXPORT_SYMBOL(unregister_sysctl_table);
2970

Archive Download this file



interactive