Root/kernel/sys.c

1/*
2 * linux/kernel/sys.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7#include <linux/module.h>
8#include <linux/mm.h>
9#include <linux/utsname.h>
10#include <linux/mman.h>
11#include <linux/smp_lock.h>
12#include <linux/notifier.h>
13#include <linux/reboot.h>
14#include <linux/prctl.h>
15#include <linux/highuid.h>
16#include <linux/fs.h>
17#include <linux/perf_event.h>
18#include <linux/resource.h>
19#include <linux/kernel.h>
20#include <linux/kexec.h>
21#include <linux/workqueue.h>
22#include <linux/capability.h>
23#include <linux/device.h>
24#include <linux/key.h>
25#include <linux/times.h>
26#include <linux/posix-timers.h>
27#include <linux/security.h>
28#include <linux/dcookies.h>
29#include <linux/suspend.h>
30#include <linux/tty.h>
31#include <linux/signal.h>
32#include <linux/cn_proc.h>
33#include <linux/getcpu.h>
34#include <linux/task_io_accounting_ops.h>
35#include <linux/seccomp.h>
36#include <linux/cpu.h>
37#include <linux/ptrace.h>
38#include <linux/fs_struct.h>
39
40#include <linux/compat.h>
41#include <linux/syscalls.h>
42#include <linux/kprobes.h>
43#include <linux/user_namespace.h>
44
45#include <asm/uaccess.h>
46#include <asm/io.h>
47#include <asm/unistd.h>
48
49#ifndef SET_UNALIGN_CTL
50# define SET_UNALIGN_CTL(a,b) (-EINVAL)
51#endif
52#ifndef GET_UNALIGN_CTL
53# define GET_UNALIGN_CTL(a,b) (-EINVAL)
54#endif
55#ifndef SET_FPEMU_CTL
56# define SET_FPEMU_CTL(a,b) (-EINVAL)
57#endif
58#ifndef GET_FPEMU_CTL
59# define GET_FPEMU_CTL(a,b) (-EINVAL)
60#endif
61#ifndef SET_FPEXC_CTL
62# define SET_FPEXC_CTL(a,b) (-EINVAL)
63#endif
64#ifndef GET_FPEXC_CTL
65# define GET_FPEXC_CTL(a,b) (-EINVAL)
66#endif
67#ifndef GET_ENDIAN
68# define GET_ENDIAN(a,b) (-EINVAL)
69#endif
70#ifndef SET_ENDIAN
71# define SET_ENDIAN(a,b) (-EINVAL)
72#endif
73#ifndef GET_TSC_CTL
74# define GET_TSC_CTL(a) (-EINVAL)
75#endif
76#ifndef SET_TSC_CTL
77# define SET_TSC_CTL(a) (-EINVAL)
78#endif
79
80/*
81 * this is where the system-wide overflow UID and GID are defined, for
82 * architectures that now have 32-bit UID/GID but didn't in the past
83 */
84
85int overflowuid = DEFAULT_OVERFLOWUID;
86int overflowgid = DEFAULT_OVERFLOWGID;
87
88#ifdef CONFIG_UID16
89EXPORT_SYMBOL(overflowuid);
90EXPORT_SYMBOL(overflowgid);
91#endif
92
93/*
94 * the same as above, but for filesystems which can only store a 16-bit
95 * UID and GID. as such, this is needed on all architectures
96 */
97
98int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
99int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
100
101EXPORT_SYMBOL(fs_overflowuid);
102EXPORT_SYMBOL(fs_overflowgid);
103
104/*
105 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
106 */
107
108int C_A_D = 1;
109struct pid *cad_pid;
110EXPORT_SYMBOL(cad_pid);
111
112/*
113 * If set, this is used for preparing the system to power off.
114 */
115
116void (*pm_power_off_prepare)(void);
117
118/*
119 * set the priority of a task
120 * - the caller must hold the RCU read lock
121 */
122static int set_one_prio(struct task_struct *p, int niceval, int error)
123{
124    const struct cred *cred = current_cred(), *pcred = __task_cred(p);
125    int no_nice;
126
127    if (pcred->uid != cred->euid &&
128        pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) {
129        error = -EPERM;
130        goto out;
131    }
132    if (niceval < task_nice(p) && !can_nice(p, niceval)) {
133        error = -EACCES;
134        goto out;
135    }
136    no_nice = security_task_setnice(p, niceval);
137    if (no_nice) {
138        error = no_nice;
139        goto out;
140    }
141    if (error == -ESRCH)
142        error = 0;
143    set_user_nice(p, niceval);
144out:
145    return error;
146}
147
148SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
149{
150    struct task_struct *g, *p;
151    struct user_struct *user;
152    const struct cred *cred = current_cred();
153    int error = -EINVAL;
154    struct pid *pgrp;
155
156    if (which > PRIO_USER || which < PRIO_PROCESS)
157        goto out;
158
159    /* normalize: avoid signed division (rounding problems) */
160    error = -ESRCH;
161    if (niceval < -20)
162        niceval = -20;
163    if (niceval > 19)
164        niceval = 19;
165
166    read_lock(&tasklist_lock);
167    switch (which) {
168        case PRIO_PROCESS:
169            if (who)
170                p = find_task_by_vpid(who);
171            else
172                p = current;
173            if (p)
174                error = set_one_prio(p, niceval, error);
175            break;
176        case PRIO_PGRP:
177            if (who)
178                pgrp = find_vpid(who);
179            else
180                pgrp = task_pgrp(current);
181            do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
182                error = set_one_prio(p, niceval, error);
183            } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
184            break;
185        case PRIO_USER:
186            user = (struct user_struct *) cred->user;
187            if (!who)
188                who = cred->uid;
189            else if ((who != cred->uid) &&
190                 !(user = find_user(who)))
191                goto out_unlock; /* No processes for this user */
192
193            do_each_thread(g, p)
194                if (__task_cred(p)->uid == who)
195                    error = set_one_prio(p, niceval, error);
196            while_each_thread(g, p);
197            if (who != cred->uid)
198                free_uid(user); /* For find_user() */
199            break;
200    }
201out_unlock:
202    read_unlock(&tasklist_lock);
203out:
204    return error;
205}
206
207/*
208 * Ugh. To avoid negative return values, "getpriority()" will
209 * not return the normal nice-value, but a negated value that
210 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
211 * to stay compatible.
212 */
213SYSCALL_DEFINE2(getpriority, int, which, int, who)
214{
215    struct task_struct *g, *p;
216    struct user_struct *user;
217    const struct cred *cred = current_cred();
218    long niceval, retval = -ESRCH;
219    struct pid *pgrp;
220
221    if (which > PRIO_USER || which < PRIO_PROCESS)
222        return -EINVAL;
223
224    read_lock(&tasklist_lock);
225    switch (which) {
226        case PRIO_PROCESS:
227            if (who)
228                p = find_task_by_vpid(who);
229            else
230                p = current;
231            if (p) {
232                niceval = 20 - task_nice(p);
233                if (niceval > retval)
234                    retval = niceval;
235            }
236            break;
237        case PRIO_PGRP:
238            if (who)
239                pgrp = find_vpid(who);
240            else
241                pgrp = task_pgrp(current);
242            do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
243                niceval = 20 - task_nice(p);
244                if (niceval > retval)
245                    retval = niceval;
246            } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
247            break;
248        case PRIO_USER:
249            user = (struct user_struct *) cred->user;
250            if (!who)
251                who = cred->uid;
252            else if ((who != cred->uid) &&
253                 !(user = find_user(who)))
254                goto out_unlock; /* No processes for this user */
255
256            do_each_thread(g, p)
257                if (__task_cred(p)->uid == who) {
258                    niceval = 20 - task_nice(p);
259                    if (niceval > retval)
260                        retval = niceval;
261                }
262            while_each_thread(g, p);
263            if (who != cred->uid)
264                free_uid(user); /* for find_user() */
265            break;
266    }
267out_unlock:
268    read_unlock(&tasklist_lock);
269
270    return retval;
271}
272
273/**
274 * emergency_restart - reboot the system
275 *
276 * Without shutting down any hardware or taking any locks
277 * reboot the system. This is called when we know we are in
278 * trouble so this is our best effort to reboot. This is
279 * safe to call in interrupt context.
280 */
281void emergency_restart(void)
282{
283    machine_emergency_restart();
284}
285EXPORT_SYMBOL_GPL(emergency_restart);
286
287void kernel_restart_prepare(char *cmd)
288{
289    blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
290    system_state = SYSTEM_RESTART;
291    device_shutdown();
292    sysdev_shutdown();
293}
294
295/**
296 * kernel_restart - reboot the system
297 * @cmd: pointer to buffer containing command to execute for restart
298 * or %NULL
299 *
300 * Shutdown everything and perform a clean reboot.
301 * This is not safe to call in interrupt context.
302 */
303void kernel_restart(char *cmd)
304{
305    kernel_restart_prepare(cmd);
306    if (!cmd)
307        printk(KERN_EMERG "Restarting system.\n");
308    else
309        printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
310    machine_restart(cmd);
311}
312EXPORT_SYMBOL_GPL(kernel_restart);
313
314static void kernel_shutdown_prepare(enum system_states state)
315{
316    blocking_notifier_call_chain(&reboot_notifier_list,
317        (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
318    system_state = state;
319    device_shutdown();
320}
321/**
322 * kernel_halt - halt the system
323 *
324 * Shutdown everything and perform a clean system halt.
325 */
326void kernel_halt(void)
327{
328    kernel_shutdown_prepare(SYSTEM_HALT);
329    sysdev_shutdown();
330    printk(KERN_EMERG "System halted.\n");
331    machine_halt();
332}
333
334EXPORT_SYMBOL_GPL(kernel_halt);
335
336/**
337 * kernel_power_off - power_off the system
338 *
339 * Shutdown everything and perform a clean system power_off.
340 */
341void kernel_power_off(void)
342{
343    kernel_shutdown_prepare(SYSTEM_POWER_OFF);
344    if (pm_power_off_prepare)
345        pm_power_off_prepare();
346    disable_nonboot_cpus();
347    sysdev_shutdown();
348    printk(KERN_EMERG "Power down.\n");
349    machine_power_off();
350}
351EXPORT_SYMBOL_GPL(kernel_power_off);
352/*
353 * Reboot system call: for obvious reasons only root may call it,
354 * and even root needs to set up some magic numbers in the registers
355 * so that some mistake won't make this reboot the whole machine.
356 * You can also set the meaning of the ctrl-alt-del-key here.
357 *
358 * reboot doesn't sync: do that yourself before calling this.
359 */
360SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
361        void __user *, arg)
362{
363    char buffer[256];
364    int ret = 0;
365
366    /* We only trust the superuser with rebooting the system. */
367    if (!capable(CAP_SYS_BOOT))
368        return -EPERM;
369
370    /* For safety, we require "magic" arguments. */
371    if (magic1 != LINUX_REBOOT_MAGIC1 ||
372        (magic2 != LINUX_REBOOT_MAGIC2 &&
373                    magic2 != LINUX_REBOOT_MAGIC2A &&
374            magic2 != LINUX_REBOOT_MAGIC2B &&
375                    magic2 != LINUX_REBOOT_MAGIC2C))
376        return -EINVAL;
377
378    /* Instead of trying to make the power_off code look like
379     * halt when pm_power_off is not set do it the easy way.
380     */
381    if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
382        cmd = LINUX_REBOOT_CMD_HALT;
383
384    lock_kernel();
385    switch (cmd) {
386    case LINUX_REBOOT_CMD_RESTART:
387        kernel_restart(NULL);
388        break;
389
390    case LINUX_REBOOT_CMD_CAD_ON:
391        C_A_D = 1;
392        break;
393
394    case LINUX_REBOOT_CMD_CAD_OFF:
395        C_A_D = 0;
396        break;
397
398    case LINUX_REBOOT_CMD_HALT:
399        kernel_halt();
400        unlock_kernel();
401        do_exit(0);
402        panic("cannot halt");
403
404    case LINUX_REBOOT_CMD_POWER_OFF:
405        kernel_power_off();
406        unlock_kernel();
407        do_exit(0);
408        break;
409
410    case LINUX_REBOOT_CMD_RESTART2:
411        if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
412            unlock_kernel();
413            return -EFAULT;
414        }
415        buffer[sizeof(buffer) - 1] = '\0';
416
417        kernel_restart(buffer);
418        break;
419
420#ifdef CONFIG_KEXEC
421    case LINUX_REBOOT_CMD_KEXEC:
422        ret = kernel_kexec();
423        break;
424#endif
425
426#ifdef CONFIG_HIBERNATION
427    case LINUX_REBOOT_CMD_SW_SUSPEND:
428        ret = hibernate();
429        break;
430#endif
431
432    default:
433        ret = -EINVAL;
434        break;
435    }
436    unlock_kernel();
437    return ret;
438}
439
440static void deferred_cad(struct work_struct *dummy)
441{
442    kernel_restart(NULL);
443}
444
445/*
446 * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
447 * As it's called within an interrupt, it may NOT sync: the only choice
448 * is whether to reboot at once, or just ignore the ctrl-alt-del.
449 */
450void ctrl_alt_del(void)
451{
452    static DECLARE_WORK(cad_work, deferred_cad);
453
454    if (C_A_D)
455        schedule_work(&cad_work);
456    else
457        kill_cad_pid(SIGINT, 1);
458}
459    
460/*
461 * Unprivileged users may change the real gid to the effective gid
462 * or vice versa. (BSD-style)
463 *
464 * If you set the real gid at all, or set the effective gid to a value not
465 * equal to the real gid, then the saved gid is set to the new effective gid.
466 *
467 * This makes it possible for a setgid program to completely drop its
468 * privileges, which is often a useful assertion to make when you are doing
469 * a security audit over a program.
470 *
471 * The general idea is that a program which uses just setregid() will be
472 * 100% compatible with BSD. A program which uses just setgid() will be
473 * 100% compatible with POSIX with saved IDs.
474 *
475 * SMP: There are not races, the GIDs are checked only by filesystem
476 * operations (as far as semantic preservation is concerned).
477 */
478SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
479{
480    const struct cred *old;
481    struct cred *new;
482    int retval;
483
484    new = prepare_creds();
485    if (!new)
486        return -ENOMEM;
487    old = current_cred();
488
489    retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
490    if (retval)
491        goto error;
492
493    retval = -EPERM;
494    if (rgid != (gid_t) -1) {
495        if (old->gid == rgid ||
496            old->egid == rgid ||
497            capable(CAP_SETGID))
498            new->gid = rgid;
499        else
500            goto error;
501    }
502    if (egid != (gid_t) -1) {
503        if (old->gid == egid ||
504            old->egid == egid ||
505            old->sgid == egid ||
506            capable(CAP_SETGID))
507            new->egid = egid;
508        else
509            goto error;
510    }
511
512    if (rgid != (gid_t) -1 ||
513        (egid != (gid_t) -1 && egid != old->gid))
514        new->sgid = new->egid;
515    new->fsgid = new->egid;
516
517    return commit_creds(new);
518
519error:
520    abort_creds(new);
521    return retval;
522}
523
524/*
525 * setgid() is implemented like SysV w/ SAVED_IDS
526 *
527 * SMP: Same implicit races as above.
528 */
529SYSCALL_DEFINE1(setgid, gid_t, gid)
530{
531    const struct cred *old;
532    struct cred *new;
533    int retval;
534
535    new = prepare_creds();
536    if (!new)
537        return -ENOMEM;
538    old = current_cred();
539
540    retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
541    if (retval)
542        goto error;
543
544    retval = -EPERM;
545    if (capable(CAP_SETGID))
546        new->gid = new->egid = new->sgid = new->fsgid = gid;
547    else if (gid == old->gid || gid == old->sgid)
548        new->egid = new->fsgid = gid;
549    else
550        goto error;
551
552    return commit_creds(new);
553
554error:
555    abort_creds(new);
556    return retval;
557}
558
559/*
560 * change the user struct in a credentials set to match the new UID
561 */
562static int set_user(struct cred *new)
563{
564    struct user_struct *new_user;
565
566    new_user = alloc_uid(current_user_ns(), new->uid);
567    if (!new_user)
568        return -EAGAIN;
569
570    if (!task_can_switch_user(new_user, current)) {
571        free_uid(new_user);
572        return -EINVAL;
573    }
574
575    if (atomic_read(&new_user->processes) >=
576                current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
577            new_user != INIT_USER) {
578        free_uid(new_user);
579        return -EAGAIN;
580    }
581
582    free_uid(new->user);
583    new->user = new_user;
584    return 0;
585}
586
587/*
588 * Unprivileged users may change the real uid to the effective uid
589 * or vice versa. (BSD-style)
590 *
591 * If you set the real uid at all, or set the effective uid to a value not
592 * equal to the real uid, then the saved uid is set to the new effective uid.
593 *
594 * This makes it possible for a setuid program to completely drop its
595 * privileges, which is often a useful assertion to make when you are doing
596 * a security audit over a program.
597 *
598 * The general idea is that a program which uses just setreuid() will be
599 * 100% compatible with BSD. A program which uses just setuid() will be
600 * 100% compatible with POSIX with saved IDs.
601 */
602SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
603{
604    const struct cred *old;
605    struct cred *new;
606    int retval;
607
608    new = prepare_creds();
609    if (!new)
610        return -ENOMEM;
611    old = current_cred();
612
613    retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
614    if (retval)
615        goto error;
616
617    retval = -EPERM;
618    if (ruid != (uid_t) -1) {
619        new->uid = ruid;
620        if (old->uid != ruid &&
621            old->euid != ruid &&
622            !capable(CAP_SETUID))
623            goto error;
624    }
625
626    if (euid != (uid_t) -1) {
627        new->euid = euid;
628        if (old->uid != euid &&
629            old->euid != euid &&
630            old->suid != euid &&
631            !capable(CAP_SETUID))
632            goto error;
633    }
634
635    if (new->uid != old->uid) {
636        retval = set_user(new);
637        if (retval < 0)
638            goto error;
639    }
640    if (ruid != (uid_t) -1 ||
641        (euid != (uid_t) -1 && euid != old->uid))
642        new->suid = new->euid;
643    new->fsuid = new->euid;
644
645    retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
646    if (retval < 0)
647        goto error;
648
649    return commit_creds(new);
650
651error:
652    abort_creds(new);
653    return retval;
654}
655        
656/*
657 * setuid() is implemented like SysV with SAVED_IDS
658 *
659 * Note that SAVED_ID's is deficient in that a setuid root program
660 * like sendmail, for example, cannot set its uid to be a normal
661 * user and then switch back, because if you're root, setuid() sets
662 * the saved uid too. If you don't like this, blame the bright people
663 * in the POSIX committee and/or USG. Note that the BSD-style setreuid()
664 * will allow a root program to temporarily drop privileges and be able to
665 * regain them by swapping the real and effective uid.
666 */
667SYSCALL_DEFINE1(setuid, uid_t, uid)
668{
669    const struct cred *old;
670    struct cred *new;
671    int retval;
672
673    new = prepare_creds();
674    if (!new)
675        return -ENOMEM;
676    old = current_cred();
677
678    retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
679    if (retval)
680        goto error;
681
682    retval = -EPERM;
683    if (capable(CAP_SETUID)) {
684        new->suid = new->uid = uid;
685        if (uid != old->uid) {
686            retval = set_user(new);
687            if (retval < 0)
688                goto error;
689        }
690    } else if (uid != old->uid && uid != new->suid) {
691        goto error;
692    }
693
694    new->fsuid = new->euid = uid;
695
696    retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
697    if (retval < 0)
698        goto error;
699
700    return commit_creds(new);
701
702error:
703    abort_creds(new);
704    return retval;
705}
706
707
708/*
709 * This function implements a generic ability to update ruid, euid,
710 * and suid. This allows you to implement the 4.4 compatible seteuid().
711 */
712SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
713{
714    const struct cred *old;
715    struct cred *new;
716    int retval;
717
718    new = prepare_creds();
719    if (!new)
720        return -ENOMEM;
721
722    retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
723    if (retval)
724        goto error;
725    old = current_cred();
726
727    retval = -EPERM;
728    if (!capable(CAP_SETUID)) {
729        if (ruid != (uid_t) -1 && ruid != old->uid &&
730            ruid != old->euid && ruid != old->suid)
731            goto error;
732        if (euid != (uid_t) -1 && euid != old->uid &&
733            euid != old->euid && euid != old->suid)
734            goto error;
735        if (suid != (uid_t) -1 && suid != old->uid &&
736            suid != old->euid && suid != old->suid)
737            goto error;
738    }
739
740    if (ruid != (uid_t) -1) {
741        new->uid = ruid;
742        if (ruid != old->uid) {
743            retval = set_user(new);
744            if (retval < 0)
745                goto error;
746        }
747    }
748    if (euid != (uid_t) -1)
749        new->euid = euid;
750    if (suid != (uid_t) -1)
751        new->suid = suid;
752    new->fsuid = new->euid;
753
754    retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
755    if (retval < 0)
756        goto error;
757
758    return commit_creds(new);
759
760error:
761    abort_creds(new);
762    return retval;
763}
764
765SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid)
766{
767    const struct cred *cred = current_cred();
768    int retval;
769
770    if (!(retval = put_user(cred->uid, ruid)) &&
771        !(retval = put_user(cred->euid, euid)))
772        retval = put_user(cred->suid, suid);
773
774    return retval;
775}
776
777/*
778 * Same as above, but for rgid, egid, sgid.
779 */
780SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
781{
782    const struct cred *old;
783    struct cred *new;
784    int retval;
785
786    new = prepare_creds();
787    if (!new)
788        return -ENOMEM;
789    old = current_cred();
790
791    retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
792    if (retval)
793        goto error;
794
795    retval = -EPERM;
796    if (!capable(CAP_SETGID)) {
797        if (rgid != (gid_t) -1 && rgid != old->gid &&
798            rgid != old->egid && rgid != old->sgid)
799            goto error;
800        if (egid != (gid_t) -1 && egid != old->gid &&
801            egid != old->egid && egid != old->sgid)
802            goto error;
803        if (sgid != (gid_t) -1 && sgid != old->gid &&
804            sgid != old->egid && sgid != old->sgid)
805            goto error;
806    }
807
808    if (rgid != (gid_t) -1)
809        new->gid = rgid;
810    if (egid != (gid_t) -1)
811        new->egid = egid;
812    if (sgid != (gid_t) -1)
813        new->sgid = sgid;
814    new->fsgid = new->egid;
815
816    return commit_creds(new);
817
818error:
819    abort_creds(new);
820    return retval;
821}
822
823SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid)
824{
825    const struct cred *cred = current_cred();
826    int retval;
827
828    if (!(retval = put_user(cred->gid, rgid)) &&
829        !(retval = put_user(cred->egid, egid)))
830        retval = put_user(cred->sgid, sgid);
831
832    return retval;
833}
834
835
836/*
837 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
838 * is used for "access()" and for the NFS daemon (letting nfsd stay at
839 * whatever uid it wants to). It normally shadows "euid", except when
840 * explicitly set by setfsuid() or for access..
841 */
842SYSCALL_DEFINE1(setfsuid, uid_t, uid)
843{
844    const struct cred *old;
845    struct cred *new;
846    uid_t old_fsuid;
847
848    new = prepare_creds();
849    if (!new)
850        return current_fsuid();
851    old = current_cred();
852    old_fsuid = old->fsuid;
853
854    if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0)
855        goto error;
856
857    if (uid == old->uid || uid == old->euid ||
858        uid == old->suid || uid == old->fsuid ||
859        capable(CAP_SETUID)) {
860        if (uid != old_fsuid) {
861            new->fsuid = uid;
862            if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
863                goto change_okay;
864        }
865    }
866
867error:
868    abort_creds(new);
869    return old_fsuid;
870
871change_okay:
872    commit_creds(new);
873    return old_fsuid;
874}
875
876/*
877 * Samma pÃ¥ svenska..
878 */
879SYSCALL_DEFINE1(setfsgid, gid_t, gid)
880{
881    const struct cred *old;
882    struct cred *new;
883    gid_t old_fsgid;
884
885    new = prepare_creds();
886    if (!new)
887        return current_fsgid();
888    old = current_cred();
889    old_fsgid = old->fsgid;
890
891    if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
892        goto error;
893
894    if (gid == old->gid || gid == old->egid ||
895        gid == old->sgid || gid == old->fsgid ||
896        capable(CAP_SETGID)) {
897        if (gid != old_fsgid) {
898            new->fsgid = gid;
899            goto change_okay;
900        }
901    }
902
903error:
904    abort_creds(new);
905    return old_fsgid;
906
907change_okay:
908    commit_creds(new);
909    return old_fsgid;
910}
911
912void do_sys_times(struct tms *tms)
913{
914    struct task_cputime cputime;
915    cputime_t cutime, cstime;
916
917    thread_group_cputime(current, &cputime);
918    spin_lock_irq(&current->sighand->siglock);
919    cutime = current->signal->cutime;
920    cstime = current->signal->cstime;
921    spin_unlock_irq(&current->sighand->siglock);
922    tms->tms_utime = cputime_to_clock_t(cputime.utime);
923    tms->tms_stime = cputime_to_clock_t(cputime.stime);
924    tms->tms_cutime = cputime_to_clock_t(cutime);
925    tms->tms_cstime = cputime_to_clock_t(cstime);
926}
927
928SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
929{
930    if (tbuf) {
931        struct tms tmp;
932
933        do_sys_times(&tmp);
934        if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
935            return -EFAULT;
936    }
937    force_successful_syscall_return();
938    return (long) jiffies_64_to_clock_t(get_jiffies_64());
939}
940
941/*
942 * This needs some heavy checking ...
943 * I just haven't the stomach for it. I also don't fully
944 * understand sessions/pgrp etc. Let somebody who does explain it.
945 *
946 * OK, I think I have the protection semantics right.... this is really
947 * only important on a multi-user system anyway, to make sure one user
948 * can't send a signal to a process owned by another. -TYT, 12/12/91
949 *
950 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
951 * LBT 04.03.94
952 */
953SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
954{
955    struct task_struct *p;
956    struct task_struct *group_leader = current->group_leader;
957    struct pid *pgrp;
958    int err;
959
960    if (!pid)
961        pid = task_pid_vnr(group_leader);
962    if (!pgid)
963        pgid = pid;
964    if (pgid < 0)
965        return -EINVAL;
966
967    /* From this point forward we keep holding onto the tasklist lock
968     * so that our parent does not change from under us. -DaveM
969     */
970    write_lock_irq(&tasklist_lock);
971
972    err = -ESRCH;
973    p = find_task_by_vpid(pid);
974    if (!p)
975        goto out;
976
977    err = -EINVAL;
978    if (!thread_group_leader(p))
979        goto out;
980
981    if (same_thread_group(p->real_parent, group_leader)) {
982        err = -EPERM;
983        if (task_session(p) != task_session(group_leader))
984            goto out;
985        err = -EACCES;
986        if (p->did_exec)
987            goto out;
988    } else {
989        err = -ESRCH;
990        if (p != group_leader)
991            goto out;
992    }
993
994    err = -EPERM;
995    if (p->signal->leader)
996        goto out;
997
998    pgrp = task_pid(p);
999    if (pgid != pid) {
1000        struct task_struct *g;
1001
1002        pgrp = find_vpid(pgid);
1003        g = pid_task(pgrp, PIDTYPE_PGID);
1004        if (!g || task_session(g) != task_session(group_leader))
1005            goto out;
1006    }
1007
1008    err = security_task_setpgid(p, pgid);
1009    if (err)
1010        goto out;
1011
1012    if (task_pgrp(p) != pgrp)
1013        change_pid(p, PIDTYPE_PGID, pgrp);
1014
1015    err = 0;
1016out:
1017    /* All paths lead to here, thus we are safe. -DaveM */
1018    write_unlock_irq(&tasklist_lock);
1019    return err;
1020}
1021
1022SYSCALL_DEFINE1(getpgid, pid_t, pid)
1023{
1024    struct task_struct *p;
1025    struct pid *grp;
1026    int retval;
1027
1028    rcu_read_lock();
1029    if (!pid)
1030        grp = task_pgrp(current);
1031    else {
1032        retval = -ESRCH;
1033        p = find_task_by_vpid(pid);
1034        if (!p)
1035            goto out;
1036        grp = task_pgrp(p);
1037        if (!grp)
1038            goto out;
1039
1040        retval = security_task_getpgid(p);
1041        if (retval)
1042            goto out;
1043    }
1044    retval = pid_vnr(grp);
1045out:
1046    rcu_read_unlock();
1047    return retval;
1048}
1049
1050#ifdef __ARCH_WANT_SYS_GETPGRP
1051
1052SYSCALL_DEFINE0(getpgrp)
1053{
1054    return sys_getpgid(0);
1055}
1056
1057#endif
1058
1059SYSCALL_DEFINE1(getsid, pid_t, pid)
1060{
1061    struct task_struct *p;
1062    struct pid *sid;
1063    int retval;
1064
1065    rcu_read_lock();
1066    if (!pid)
1067        sid = task_session(current);
1068    else {
1069        retval = -ESRCH;
1070        p = find_task_by_vpid(pid);
1071        if (!p)
1072            goto out;
1073        sid = task_session(p);
1074        if (!sid)
1075            goto out;
1076
1077        retval = security_task_getsid(p);
1078        if (retval)
1079            goto out;
1080    }
1081    retval = pid_vnr(sid);
1082out:
1083    rcu_read_unlock();
1084    return retval;
1085}
1086
1087SYSCALL_DEFINE0(setsid)
1088{
1089    struct task_struct *group_leader = current->group_leader;
1090    struct pid *sid = task_pid(group_leader);
1091    pid_t session = pid_vnr(sid);
1092    int err = -EPERM;
1093
1094    write_lock_irq(&tasklist_lock);
1095    /* Fail if I am already a session leader */
1096    if (group_leader->signal->leader)
1097        goto out;
1098
1099    /* Fail if a process group id already exists that equals the
1100     * proposed session id.
1101     */
1102    if (pid_task(sid, PIDTYPE_PGID))
1103        goto out;
1104
1105    group_leader->signal->leader = 1;
1106    __set_special_pids(sid);
1107
1108    proc_clear_tty(group_leader);
1109
1110    err = session;
1111out:
1112    write_unlock_irq(&tasklist_lock);
1113    if (err > 0)
1114        proc_sid_connector(group_leader);
1115    return err;
1116}
1117
1118DECLARE_RWSEM(uts_sem);
1119
1120SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
1121{
1122    int errno = 0;
1123
1124    down_read(&uts_sem);
1125    if (copy_to_user(name, utsname(), sizeof *name))
1126        errno = -EFAULT;
1127    up_read(&uts_sem);
1128    return errno;
1129}
1130
1131SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1132{
1133    int errno;
1134    char tmp[__NEW_UTS_LEN];
1135
1136    if (!capable(CAP_SYS_ADMIN))
1137        return -EPERM;
1138    if (len < 0 || len > __NEW_UTS_LEN)
1139        return -EINVAL;
1140    down_write(&uts_sem);
1141    errno = -EFAULT;
1142    if (!copy_from_user(tmp, name, len)) {
1143        struct new_utsname *u = utsname();
1144
1145        memcpy(u->nodename, tmp, len);
1146        memset(u->nodename + len, 0, sizeof(u->nodename) - len);
1147        errno = 0;
1148    }
1149    up_write(&uts_sem);
1150    return errno;
1151}
1152
1153#ifdef __ARCH_WANT_SYS_GETHOSTNAME
1154
1155SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
1156{
1157    int i, errno;
1158    struct new_utsname *u;
1159
1160    if (len < 0)
1161        return -EINVAL;
1162    down_read(&uts_sem);
1163    u = utsname();
1164    i = 1 + strlen(u->nodename);
1165    if (i > len)
1166        i = len;
1167    errno = 0;
1168    if (copy_to_user(name, u->nodename, i))
1169        errno = -EFAULT;
1170    up_read(&uts_sem);
1171    return errno;
1172}
1173
1174#endif
1175
1176/*
1177 * Only setdomainname; getdomainname can be implemented by calling
1178 * uname()
1179 */
1180SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1181{
1182    int errno;
1183    char tmp[__NEW_UTS_LEN];
1184
1185    if (!capable(CAP_SYS_ADMIN))
1186        return -EPERM;
1187    if (len < 0 || len > __NEW_UTS_LEN)
1188        return -EINVAL;
1189
1190    down_write(&uts_sem);
1191    errno = -EFAULT;
1192    if (!copy_from_user(tmp, name, len)) {
1193        struct new_utsname *u = utsname();
1194
1195        memcpy(u->domainname, tmp, len);
1196        memset(u->domainname + len, 0, sizeof(u->domainname) - len);
1197        errno = 0;
1198    }
1199    up_write(&uts_sem);
1200    return errno;
1201}
1202
1203SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1204{
1205    if (resource >= RLIM_NLIMITS)
1206        return -EINVAL;
1207    else {
1208        struct rlimit value;
1209        task_lock(current->group_leader);
1210        value = current->signal->rlim[resource];
1211        task_unlock(current->group_leader);
1212        return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
1213    }
1214}
1215
1216#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
1217
1218/*
1219 * Back compatibility for getrlimit. Needed for some apps.
1220 */
1221 
1222SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
1223        struct rlimit __user *, rlim)
1224{
1225    struct rlimit x;
1226    if (resource >= RLIM_NLIMITS)
1227        return -EINVAL;
1228
1229    task_lock(current->group_leader);
1230    x = current->signal->rlim[resource];
1231    task_unlock(current->group_leader);
1232    if (x.rlim_cur > 0x7FFFFFFF)
1233        x.rlim_cur = 0x7FFFFFFF;
1234    if (x.rlim_max > 0x7FFFFFFF)
1235        x.rlim_max = 0x7FFFFFFF;
1236    return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1237}
1238
1239#endif
1240
1241SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1242{
1243    struct rlimit new_rlim, *old_rlim;
1244    int retval;
1245
1246    if (resource >= RLIM_NLIMITS)
1247        return -EINVAL;
1248    if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1249        return -EFAULT;
1250    if (new_rlim.rlim_cur > new_rlim.rlim_max)
1251        return -EINVAL;
1252    old_rlim = current->signal->rlim + resource;
1253    if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
1254        !capable(CAP_SYS_RESOURCE))
1255        return -EPERM;
1256    if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
1257        return -EPERM;
1258
1259    retval = security_task_setrlimit(resource, &new_rlim);
1260    if (retval)
1261        return retval;
1262
1263    if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
1264        /*
1265         * The caller is asking for an immediate RLIMIT_CPU
1266         * expiry. But we use the zero value to mean "it was
1267         * never set". So let's cheat and make it one second
1268         * instead
1269         */
1270        new_rlim.rlim_cur = 1;
1271    }
1272
1273    task_lock(current->group_leader);
1274    *old_rlim = new_rlim;
1275    task_unlock(current->group_leader);
1276
1277    if (resource != RLIMIT_CPU)
1278        goto out;
1279
1280    /*
1281     * RLIMIT_CPU handling. Note that the kernel fails to return an error
1282     * code if it rejected the user's attempt to set RLIMIT_CPU. This is a
1283     * very long-standing error, and fixing it now risks breakage of
1284     * applications, so we live with it
1285     */
1286    if (new_rlim.rlim_cur == RLIM_INFINITY)
1287        goto out;
1288
1289    update_rlimit_cpu(new_rlim.rlim_cur);
1290out:
1291    return 0;
1292}
1293
1294/*
1295 * It would make sense to put struct rusage in the task_struct,
1296 * except that would make the task_struct be *really big*. After
1297 * task_struct gets moved into malloc'ed memory, it would
1298 * make sense to do this. It will make moving the rest of the information
1299 * a lot simpler! (Which we're not doing right now because we're not
1300 * measuring them yet).
1301 *
1302 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1303 * races with threads incrementing their own counters. But since word
1304 * reads are atomic, we either get new values or old values and we don't
1305 * care which for the sums. We always take the siglock to protect reading
1306 * the c* fields from p->signal from races with exit.c updating those
1307 * fields when reaping, so a sample either gets all the additions of a
1308 * given child after it's reaped, or none so this sample is before reaping.
1309 *
1310 * Locking:
1311 * We need to take the siglock for CHILDEREN, SELF and BOTH
1312 * for the cases current multithreaded, non-current single threaded
1313 * non-current multithreaded. Thread traversal is now safe with
1314 * the siglock held.
1315 * Strictly speaking, we donot need to take the siglock if we are current and
1316 * single threaded, as no one else can take our signal_struct away, no one
1317 * else can reap the children to update signal->c* counters, and no one else
1318 * can race with the signal-> fields. If we do not take any lock, the
1319 * signal-> fields could be read out of order while another thread was just
1320 * exiting. So we should place a read memory barrier when we avoid the lock.
1321 * On the writer side, write memory barrier is implied in __exit_signal
1322 * as __exit_signal releases the siglock spinlock after updating the signal->
1323 * fields. But we don't do this yet to keep things simple.
1324 *
1325 */
1326
1327static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
1328{
1329    r->ru_nvcsw += t->nvcsw;
1330    r->ru_nivcsw += t->nivcsw;
1331    r->ru_minflt += t->min_flt;
1332    r->ru_majflt += t->maj_flt;
1333    r->ru_inblock += task_io_get_inblock(t);
1334    r->ru_oublock += task_io_get_oublock(t);
1335}
1336
1337static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1338{
1339    struct task_struct *t;
1340    unsigned long flags;
1341    cputime_t utime, stime;
1342    struct task_cputime cputime;
1343    unsigned long maxrss = 0;
1344
1345    memset((char *) r, 0, sizeof *r);
1346    utime = stime = cputime_zero;
1347
1348    if (who == RUSAGE_THREAD) {
1349        utime = task_utime(current);
1350        stime = task_stime(current);
1351        accumulate_thread_rusage(p, r);
1352        maxrss = p->signal->maxrss;
1353        goto out;
1354    }
1355
1356    if (!lock_task_sighand(p, &flags))
1357        return;
1358
1359    switch (who) {
1360        case RUSAGE_BOTH:
1361        case RUSAGE_CHILDREN:
1362            utime = p->signal->cutime;
1363            stime = p->signal->cstime;
1364            r->ru_nvcsw = p->signal->cnvcsw;
1365            r->ru_nivcsw = p->signal->cnivcsw;
1366            r->ru_minflt = p->signal->cmin_flt;
1367            r->ru_majflt = p->signal->cmaj_flt;
1368            r->ru_inblock = p->signal->cinblock;
1369            r->ru_oublock = p->signal->coublock;
1370            maxrss = p->signal->cmaxrss;
1371
1372            if (who == RUSAGE_CHILDREN)
1373                break;
1374
1375        case RUSAGE_SELF:
1376            thread_group_cputime(p, &cputime);
1377            utime = cputime_add(utime, cputime.utime);
1378            stime = cputime_add(stime, cputime.stime);
1379            r->ru_nvcsw += p->signal->nvcsw;
1380            r->ru_nivcsw += p->signal->nivcsw;
1381            r->ru_minflt += p->signal->min_flt;
1382            r->ru_majflt += p->signal->maj_flt;
1383            r->ru_inblock += p->signal->inblock;
1384            r->ru_oublock += p->signal->oublock;
1385            if (maxrss < p->signal->maxrss)
1386                maxrss = p->signal->maxrss;
1387            t = p;
1388            do {
1389                accumulate_thread_rusage(t, r);
1390                t = next_thread(t);
1391            } while (t != p);
1392            break;
1393
1394        default:
1395            BUG();
1396    }
1397    unlock_task_sighand(p, &flags);
1398
1399out:
1400    cputime_to_timeval(utime, &r->ru_utime);
1401    cputime_to_timeval(stime, &r->ru_stime);
1402
1403    if (who != RUSAGE_CHILDREN) {
1404        struct mm_struct *mm = get_task_mm(p);
1405        if (mm) {
1406            setmax_mm_hiwater_rss(&maxrss, mm);
1407            mmput(mm);
1408        }
1409    }
1410    r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
1411}
1412
1413int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1414{
1415    struct rusage r;
1416    k_getrusage(p, who, &r);
1417    return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1418}
1419
1420SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
1421{
1422    if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
1423        who != RUSAGE_THREAD)
1424        return -EINVAL;
1425    return getrusage(current, who, ru);
1426}
1427
1428SYSCALL_DEFINE1(umask, int, mask)
1429{
1430    mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1431    return mask;
1432}
1433
1434SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
1435        unsigned long, arg4, unsigned long, arg5)
1436{
1437    struct task_struct *me = current;
1438    unsigned char comm[sizeof(me->comm)];
1439    long error;
1440
1441    error = security_task_prctl(option, arg2, arg3, arg4, arg5);
1442    if (error != -ENOSYS)
1443        return error;
1444
1445    error = 0;
1446    switch (option) {
1447        case PR_SET_PDEATHSIG:
1448            if (!valid_signal(arg2)) {
1449                error = -EINVAL;
1450                break;
1451            }
1452            me->pdeath_signal = arg2;
1453            error = 0;
1454            break;
1455        case PR_GET_PDEATHSIG:
1456            error = put_user(me->pdeath_signal, (int __user *)arg2);
1457            break;
1458        case PR_GET_DUMPABLE:
1459            error = get_dumpable(me->mm);
1460            break;
1461        case PR_SET_DUMPABLE:
1462            if (arg2 < 0 || arg2 > 1) {
1463                error = -EINVAL;
1464                break;
1465            }
1466            set_dumpable(me->mm, arg2);
1467            error = 0;
1468            break;
1469
1470        case PR_SET_UNALIGN:
1471            error = SET_UNALIGN_CTL(me, arg2);
1472            break;
1473        case PR_GET_UNALIGN:
1474            error = GET_UNALIGN_CTL(me, arg2);
1475            break;
1476        case PR_SET_FPEMU:
1477            error = SET_FPEMU_CTL(me, arg2);
1478            break;
1479        case PR_GET_FPEMU:
1480            error = GET_FPEMU_CTL(me, arg2);
1481            break;
1482        case PR_SET_FPEXC:
1483            error = SET_FPEXC_CTL(me, arg2);
1484            break;
1485        case PR_GET_FPEXC:
1486            error = GET_FPEXC_CTL(me, arg2);
1487            break;
1488        case PR_GET_TIMING:
1489            error = PR_TIMING_STATISTICAL;
1490            break;
1491        case PR_SET_TIMING:
1492            if (arg2 != PR_TIMING_STATISTICAL)
1493                error = -EINVAL;
1494            else
1495                error = 0;
1496            break;
1497
1498        case PR_SET_NAME:
1499            comm[sizeof(me->comm)-1] = 0;
1500            if (strncpy_from_user(comm, (char __user *)arg2,
1501                          sizeof(me->comm) - 1) < 0)
1502                return -EFAULT;
1503            set_task_comm(me, comm);
1504            return 0;
1505        case PR_GET_NAME:
1506            get_task_comm(comm, me);
1507            if (copy_to_user((char __user *)arg2, comm,
1508                     sizeof(comm)))
1509                return -EFAULT;
1510            return 0;
1511        case PR_GET_ENDIAN:
1512            error = GET_ENDIAN(me, arg2);
1513            break;
1514        case PR_SET_ENDIAN:
1515            error = SET_ENDIAN(me, arg2);
1516            break;
1517
1518        case PR_GET_SECCOMP:
1519            error = prctl_get_seccomp();
1520            break;
1521        case PR_SET_SECCOMP:
1522            error = prctl_set_seccomp(arg2);
1523            break;
1524        case PR_GET_TSC:
1525            error = GET_TSC_CTL(arg2);
1526            break;
1527        case PR_SET_TSC:
1528            error = SET_TSC_CTL(arg2);
1529            break;
1530        case PR_TASK_PERF_EVENTS_DISABLE:
1531            error = perf_event_task_disable();
1532            break;
1533        case PR_TASK_PERF_EVENTS_ENABLE:
1534            error = perf_event_task_enable();
1535            break;
1536        case PR_GET_TIMERSLACK:
1537            error = current->timer_slack_ns;
1538            break;
1539        case PR_SET_TIMERSLACK:
1540            if (arg2 <= 0)
1541                current->timer_slack_ns =
1542                    current->default_timer_slack_ns;
1543            else
1544                current->timer_slack_ns = arg2;
1545            error = 0;
1546            break;
1547        case PR_MCE_KILL:
1548            if (arg4 | arg5)
1549                return -EINVAL;
1550            switch (arg2) {
1551            case PR_MCE_KILL_CLEAR:
1552                if (arg3 != 0)
1553                    return -EINVAL;
1554                current->flags &= ~PF_MCE_PROCESS;
1555                break;
1556            case PR_MCE_KILL_SET:
1557                current->flags |= PF_MCE_PROCESS;
1558                if (arg3 == PR_MCE_KILL_EARLY)
1559                    current->flags |= PF_MCE_EARLY;
1560                else if (arg3 == PR_MCE_KILL_LATE)
1561                    current->flags &= ~PF_MCE_EARLY;
1562                else if (arg3 == PR_MCE_KILL_DEFAULT)
1563                    current->flags &=
1564                        ~(PF_MCE_EARLY|PF_MCE_PROCESS);
1565                else
1566                    return -EINVAL;
1567                break;
1568            default:
1569                return -EINVAL;
1570            }
1571            error = 0;
1572            break;
1573        case PR_MCE_KILL_GET:
1574            if (arg2 | arg3 | arg4 | arg5)
1575                return -EINVAL;
1576            if (current->flags & PF_MCE_PROCESS)
1577                error = (current->flags & PF_MCE_EARLY) ?
1578                    PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
1579            else
1580                error = PR_MCE_KILL_DEFAULT;
1581            break;
1582        default:
1583            error = -EINVAL;
1584            break;
1585    }
1586    return error;
1587}
1588
1589SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
1590        struct getcpu_cache __user *, unused)
1591{
1592    int err = 0;
1593    int cpu = raw_smp_processor_id();
1594    if (cpup)
1595        err |= put_user(cpu, cpup);
1596    if (nodep)
1597        err |= put_user(cpu_to_node(cpu), nodep);
1598    return err ? -EFAULT : 0;
1599}
1600
1601char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
1602
1603static void argv_cleanup(char **argv, char **envp)
1604{
1605    argv_free(argv);
1606}
1607
1608/**
1609 * orderly_poweroff - Trigger an orderly system poweroff
1610 * @force: force poweroff if command execution fails
1611 *
1612 * This may be called from any context to trigger a system shutdown.
1613 * If the orderly shutdown fails, it will force an immediate shutdown.
1614 */
1615int orderly_poweroff(bool force)
1616{
1617    int argc;
1618    char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
1619    static char *envp[] = {
1620        "HOME=/",
1621        "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
1622        NULL
1623    };
1624    int ret = -ENOMEM;
1625    struct subprocess_info *info;
1626
1627    if (argv == NULL) {
1628        printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
1629               __func__, poweroff_cmd);
1630        goto out;
1631    }
1632
1633    info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
1634    if (info == NULL) {
1635        argv_free(argv);
1636        goto out;
1637    }
1638
1639    call_usermodehelper_setcleanup(info, argv_cleanup);
1640
1641    ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
1642
1643  out:
1644    if (ret && force) {
1645        printk(KERN_WARNING "Failed to start orderly shutdown: "
1646               "forcing the issue\n");
1647
1648        /* I guess this should try to kick off some daemon to
1649           sync and poweroff asap. Or not even bother syncing
1650           if we're doing an emergency shutdown? */
1651        emergency_sync();
1652        kernel_power_off();
1653    }
1654
1655    return ret;
1656}
1657EXPORT_SYMBOL_GPL(orderly_poweroff);
1658

Archive Download this file



interactive