Root/ipc/shm.c

1/*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7 *
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15 *
16 * support for audit of ipc object properties and permission changes
17 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18 *
19 * namespaces support
20 * OpenVZ, SWsoft Inc.
21 * Pavel Emelianov <xemul@openvz.org>
22 */
23
24#include <linux/slab.h>
25#include <linux/mm.h>
26#include <linux/hugetlb.h>
27#include <linux/shm.h>
28#include <linux/init.h>
29#include <linux/file.h>
30#include <linux/mman.h>
31#include <linux/shmem_fs.h>
32#include <linux/security.h>
33#include <linux/syscalls.h>
34#include <linux/audit.h>
35#include <linux/capability.h>
36#include <linux/ptrace.h>
37#include <linux/seq_file.h>
38#include <linux/rwsem.h>
39#include <linux/nsproxy.h>
40#include <linux/mount.h>
41#include <linux/ipc_namespace.h>
42
43#include <asm/uaccess.h>
44
45#include "util.h"
46
47struct shm_file_data {
48    int id;
49    struct ipc_namespace *ns;
50    struct file *file;
51    const struct vm_operations_struct *vm_ops;
52};
53
54#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
55
56static const struct file_operations shm_file_operations;
57static const struct vm_operations_struct shm_vm_ops;
58
59#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
60
61#define shm_unlock(shp) \
62    ipc_unlock(&(shp)->shm_perm)
63
64static int newseg(struct ipc_namespace *, struct ipc_params *);
65static void shm_open(struct vm_area_struct *vma);
66static void shm_close(struct vm_area_struct *vma);
67static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
68#ifdef CONFIG_PROC_FS
69static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
70#endif
71
72void shm_init_ns(struct ipc_namespace *ns)
73{
74    ns->shm_ctlmax = SHMMAX;
75    ns->shm_ctlall = SHMALL;
76    ns->shm_ctlmni = SHMMNI;
77    ns->shm_rmid_forced = 0;
78    ns->shm_tot = 0;
79    ipc_init_ids(&shm_ids(ns));
80}
81
82/*
83 * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
84 * Only shm_ids.rw_mutex remains locked on exit.
85 */
86static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
87{
88    struct shmid_kernel *shp;
89    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
90
91    if (shp->shm_nattch){
92        shp->shm_perm.mode |= SHM_DEST;
93        /* Do not find it any more */
94        shp->shm_perm.key = IPC_PRIVATE;
95        shm_unlock(shp);
96    } else
97        shm_destroy(ns, shp);
98}
99
100#ifdef CONFIG_IPC_NS
101void shm_exit_ns(struct ipc_namespace *ns)
102{
103    free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
104    idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
105}
106#endif
107
108static int __init ipc_ns_init(void)
109{
110    shm_init_ns(&init_ipc_ns);
111    return 0;
112}
113
114pure_initcall(ipc_ns_init);
115
116void __init shm_init (void)
117{
118    ipc_init_proc_interface("sysvipc/shm",
119#if BITS_PER_LONG <= 32
120                " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
121#else
122                " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
123#endif
124                IPC_SHM_IDS, sysvipc_shm_proc_show);
125}
126
127/*
128 * shm_lock_(check_) routines are called in the paths where the rw_mutex
129 * is not necessarily held.
130 */
131static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
132{
133    struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
134
135    if (IS_ERR(ipcp))
136        return (struct shmid_kernel *)ipcp;
137
138    return container_of(ipcp, struct shmid_kernel, shm_perm);
139}
140
141static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
142{
143    rcu_read_lock();
144    spin_lock(&ipcp->shm_perm.lock);
145}
146
147static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
148                        int id)
149{
150    struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
151
152    if (IS_ERR(ipcp))
153        return (struct shmid_kernel *)ipcp;
154
155    return container_of(ipcp, struct shmid_kernel, shm_perm);
156}
157
158static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
159{
160    ipc_rmid(&shm_ids(ns), &s->shm_perm);
161}
162
163
164/* This is called by fork, once for every shm attach. */
165static void shm_open(struct vm_area_struct *vma)
166{
167    struct file *file = vma->vm_file;
168    struct shm_file_data *sfd = shm_file_data(file);
169    struct shmid_kernel *shp;
170
171    shp = shm_lock(sfd->ns, sfd->id);
172    BUG_ON(IS_ERR(shp));
173    shp->shm_atim = get_seconds();
174    shp->shm_lprid = task_tgid_vnr(current);
175    shp->shm_nattch++;
176    shm_unlock(shp);
177}
178
179/*
180 * shm_destroy - free the struct shmid_kernel
181 *
182 * @ns: namespace
183 * @shp: struct to free
184 *
185 * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
186 * but returns with shp unlocked and freed.
187 */
188static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
189{
190    ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
191    shm_rmid(ns, shp);
192    shm_unlock(shp);
193    if (!is_file_hugepages(shp->shm_file))
194        shmem_lock(shp->shm_file, 0, shp->mlock_user);
195    else if (shp->mlock_user)
196        user_shm_unlock(file_inode(shp->shm_file)->i_size,
197                        shp->mlock_user);
198    fput (shp->shm_file);
199    security_shm_free(shp);
200    ipc_rcu_putref(shp);
201}
202
203/*
204 * shm_may_destroy - identifies whether shm segment should be destroyed now
205 *
206 * Returns true if and only if there are no active users of the segment and
207 * one of the following is true:
208 *
209 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
210 *
211 * 2) sysctl kernel.shm_rmid_forced is set to 1.
212 */
213static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
214{
215    return (shp->shm_nattch == 0) &&
216           (ns->shm_rmid_forced ||
217        (shp->shm_perm.mode & SHM_DEST));
218}
219
220/*
221 * remove the attach descriptor vma.
222 * free memory for segment if it is marked destroyed.
223 * The descriptor has already been removed from the current->mm->mmap list
224 * and will later be kfree()d.
225 */
226static void shm_close(struct vm_area_struct *vma)
227{
228    struct file * file = vma->vm_file;
229    struct shm_file_data *sfd = shm_file_data(file);
230    struct shmid_kernel *shp;
231    struct ipc_namespace *ns = sfd->ns;
232
233    down_write(&shm_ids(ns).rw_mutex);
234    /* remove from the list of attaches of the shm segment */
235    shp = shm_lock(ns, sfd->id);
236    BUG_ON(IS_ERR(shp));
237    shp->shm_lprid = task_tgid_vnr(current);
238    shp->shm_dtim = get_seconds();
239    shp->shm_nattch--;
240    if (shm_may_destroy(ns, shp))
241        shm_destroy(ns, shp);
242    else
243        shm_unlock(shp);
244    up_write(&shm_ids(ns).rw_mutex);
245}
246
247/* Called with ns->shm_ids(ns).rw_mutex locked */
248static int shm_try_destroy_current(int id, void *p, void *data)
249{
250    struct ipc_namespace *ns = data;
251    struct kern_ipc_perm *ipcp = p;
252    struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
253
254    if (shp->shm_creator != current)
255        return 0;
256
257    /*
258     * Mark it as orphaned to destroy the segment when
259     * kernel.shm_rmid_forced is changed.
260     * It is noop if the following shm_may_destroy() returns true.
261     */
262    shp->shm_creator = NULL;
263
264    /*
265     * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
266     * is not set, it shouldn't be deleted here.
267     */
268    if (!ns->shm_rmid_forced)
269        return 0;
270
271    if (shm_may_destroy(ns, shp)) {
272        shm_lock_by_ptr(shp);
273        shm_destroy(ns, shp);
274    }
275    return 0;
276}
277
278/* Called with ns->shm_ids(ns).rw_mutex locked */
279static int shm_try_destroy_orphaned(int id, void *p, void *data)
280{
281    struct ipc_namespace *ns = data;
282    struct kern_ipc_perm *ipcp = p;
283    struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
284
285    /*
286     * We want to destroy segments without users and with already
287     * exit'ed originating process.
288     *
289     * As shp->* are changed under rw_mutex, it's safe to skip shp locking.
290     */
291    if (shp->shm_creator != NULL)
292        return 0;
293
294    if (shm_may_destroy(ns, shp)) {
295        shm_lock_by_ptr(shp);
296        shm_destroy(ns, shp);
297    }
298    return 0;
299}
300
301void shm_destroy_orphaned(struct ipc_namespace *ns)
302{
303    down_write(&shm_ids(ns).rw_mutex);
304    if (shm_ids(ns).in_use)
305        idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
306    up_write(&shm_ids(ns).rw_mutex);
307}
308
309
310void exit_shm(struct task_struct *task)
311{
312    struct ipc_namespace *ns = task->nsproxy->ipc_ns;
313
314    if (shm_ids(ns).in_use == 0)
315        return;
316
317    /* Destroy all already created segments, but not mapped yet */
318    down_write(&shm_ids(ns).rw_mutex);
319    if (shm_ids(ns).in_use)
320        idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
321    up_write(&shm_ids(ns).rw_mutex);
322}
323
324static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
325{
326    struct file *file = vma->vm_file;
327    struct shm_file_data *sfd = shm_file_data(file);
328
329    return sfd->vm_ops->fault(vma, vmf);
330}
331
332#ifdef CONFIG_NUMA
333static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
334{
335    struct file *file = vma->vm_file;
336    struct shm_file_data *sfd = shm_file_data(file);
337    int err = 0;
338    if (sfd->vm_ops->set_policy)
339        err = sfd->vm_ops->set_policy(vma, new);
340    return err;
341}
342
343static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
344                    unsigned long addr)
345{
346    struct file *file = vma->vm_file;
347    struct shm_file_data *sfd = shm_file_data(file);
348    struct mempolicy *pol = NULL;
349
350    if (sfd->vm_ops->get_policy)
351        pol = sfd->vm_ops->get_policy(vma, addr);
352    else if (vma->vm_policy)
353        pol = vma->vm_policy;
354
355    return pol;
356}
357#endif
358
359static int shm_mmap(struct file * file, struct vm_area_struct * vma)
360{
361    struct shm_file_data *sfd = shm_file_data(file);
362    int ret;
363
364    ret = sfd->file->f_op->mmap(sfd->file, vma);
365    if (ret != 0)
366        return ret;
367    sfd->vm_ops = vma->vm_ops;
368#ifdef CONFIG_MMU
369    BUG_ON(!sfd->vm_ops->fault);
370#endif
371    vma->vm_ops = &shm_vm_ops;
372    shm_open(vma);
373
374    return ret;
375}
376
377static int shm_release(struct inode *ino, struct file *file)
378{
379    struct shm_file_data *sfd = shm_file_data(file);
380
381    put_ipc_ns(sfd->ns);
382    shm_file_data(file) = NULL;
383    kfree(sfd);
384    return 0;
385}
386
387static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
388{
389    struct shm_file_data *sfd = shm_file_data(file);
390
391    if (!sfd->file->f_op->fsync)
392        return -EINVAL;
393    return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
394}
395
396static long shm_fallocate(struct file *file, int mode, loff_t offset,
397              loff_t len)
398{
399    struct shm_file_data *sfd = shm_file_data(file);
400
401    if (!sfd->file->f_op->fallocate)
402        return -EOPNOTSUPP;
403    return sfd->file->f_op->fallocate(file, mode, offset, len);
404}
405
406static unsigned long shm_get_unmapped_area(struct file *file,
407    unsigned long addr, unsigned long len, unsigned long pgoff,
408    unsigned long flags)
409{
410    struct shm_file_data *sfd = shm_file_data(file);
411    return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
412                        pgoff, flags);
413}
414
415static const struct file_operations shm_file_operations = {
416    .mmap = shm_mmap,
417    .fsync = shm_fsync,
418    .release = shm_release,
419#ifndef CONFIG_MMU
420    .get_unmapped_area = shm_get_unmapped_area,
421#endif
422    .llseek = noop_llseek,
423    .fallocate = shm_fallocate,
424};
425
426static const struct file_operations shm_file_operations_huge = {
427    .mmap = shm_mmap,
428    .fsync = shm_fsync,
429    .release = shm_release,
430    .get_unmapped_area = shm_get_unmapped_area,
431    .llseek = noop_llseek,
432    .fallocate = shm_fallocate,
433};
434
435int is_file_shm_hugepages(struct file *file)
436{
437    return file->f_op == &shm_file_operations_huge;
438}
439
440static const struct vm_operations_struct shm_vm_ops = {
441    .open = shm_open, /* callback for a new vm-area open */
442    .close = shm_close, /* callback for when the vm-area is released */
443    .fault = shm_fault,
444#if defined(CONFIG_NUMA)
445    .set_policy = shm_set_policy,
446    .get_policy = shm_get_policy,
447#endif
448};
449
450/**
451 * newseg - Create a new shared memory segment
452 * @ns: namespace
453 * @params: ptr to the structure that contains key, size and shmflg
454 *
455 * Called with shm_ids.rw_mutex held as a writer.
456 */
457
458static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
459{
460    key_t key = params->key;
461    int shmflg = params->flg;
462    size_t size = params->u.size;
463    int error;
464    struct shmid_kernel *shp;
465    int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
466    struct file * file;
467    char name[13];
468    int id;
469    vm_flags_t acctflag = 0;
470
471    if (size < SHMMIN || size > ns->shm_ctlmax)
472        return -EINVAL;
473
474    if (ns->shm_tot + numpages > ns->shm_ctlall)
475        return -ENOSPC;
476
477    shp = ipc_rcu_alloc(sizeof(*shp));
478    if (!shp)
479        return -ENOMEM;
480
481    shp->shm_perm.key = key;
482    shp->shm_perm.mode = (shmflg & S_IRWXUGO);
483    shp->mlock_user = NULL;
484
485    shp->shm_perm.security = NULL;
486    error = security_shm_alloc(shp);
487    if (error) {
488        ipc_rcu_putref(shp);
489        return error;
490    }
491
492    sprintf (name, "SYSV%08x", key);
493    if (shmflg & SHM_HUGETLB) {
494        /* hugetlb_file_setup applies strict accounting */
495        if (shmflg & SHM_NORESERVE)
496            acctflag = VM_NORESERVE;
497        file = hugetlb_file_setup(name, 0, size, acctflag,
498                  &shp->mlock_user, HUGETLB_SHMFS_INODE,
499                (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
500    } else {
501        /*
502         * Do not allow no accounting for OVERCOMMIT_NEVER, even
503          * if it's asked for.
504         */
505        if ((shmflg & SHM_NORESERVE) &&
506                sysctl_overcommit_memory != OVERCOMMIT_NEVER)
507            acctflag = VM_NORESERVE;
508        file = shmem_file_setup(name, size, acctflag);
509    }
510    error = PTR_ERR(file);
511    if (IS_ERR(file))
512        goto no_file;
513
514    id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
515    if (id < 0) {
516        error = id;
517        goto no_id;
518    }
519
520    shp->shm_cprid = task_tgid_vnr(current);
521    shp->shm_lprid = 0;
522    shp->shm_atim = shp->shm_dtim = 0;
523    shp->shm_ctim = get_seconds();
524    shp->shm_segsz = size;
525    shp->shm_nattch = 0;
526    shp->shm_file = file;
527    shp->shm_creator = current;
528    /*
529     * shmid gets reported as "inode#" in /proc/pid/maps.
530     * proc-ps tools use this. Changing this will break them.
531     */
532    file_inode(file)->i_ino = shp->shm_perm.id;
533
534    ns->shm_tot += numpages;
535    error = shp->shm_perm.id;
536    shm_unlock(shp);
537    return error;
538
539no_id:
540    if (is_file_hugepages(file) && shp->mlock_user)
541        user_shm_unlock(size, shp->mlock_user);
542    fput(file);
543no_file:
544    security_shm_free(shp);
545    ipc_rcu_putref(shp);
546    return error;
547}
548
549/*
550 * Called with shm_ids.rw_mutex and ipcp locked.
551 */
552static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
553{
554    struct shmid_kernel *shp;
555
556    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
557    return security_shm_associate(shp, shmflg);
558}
559
560/*
561 * Called with shm_ids.rw_mutex and ipcp locked.
562 */
563static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
564                struct ipc_params *params)
565{
566    struct shmid_kernel *shp;
567
568    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
569    if (shp->shm_segsz < params->u.size)
570        return -EINVAL;
571
572    return 0;
573}
574
575SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
576{
577    struct ipc_namespace *ns;
578    struct ipc_ops shm_ops;
579    struct ipc_params shm_params;
580
581    ns = current->nsproxy->ipc_ns;
582
583    shm_ops.getnew = newseg;
584    shm_ops.associate = shm_security;
585    shm_ops.more_checks = shm_more_checks;
586
587    shm_params.key = key;
588    shm_params.flg = shmflg;
589    shm_params.u.size = size;
590
591    return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
592}
593
594static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
595{
596    switch(version) {
597    case IPC_64:
598        return copy_to_user(buf, in, sizeof(*in));
599    case IPC_OLD:
600        {
601        struct shmid_ds out;
602
603        memset(&out, 0, sizeof(out));
604        ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
605        out.shm_segsz = in->shm_segsz;
606        out.shm_atime = in->shm_atime;
607        out.shm_dtime = in->shm_dtime;
608        out.shm_ctime = in->shm_ctime;
609        out.shm_cpid = in->shm_cpid;
610        out.shm_lpid = in->shm_lpid;
611        out.shm_nattch = in->shm_nattch;
612
613        return copy_to_user(buf, &out, sizeof(out));
614        }
615    default:
616        return -EINVAL;
617    }
618}
619
620static inline unsigned long
621copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
622{
623    switch(version) {
624    case IPC_64:
625        if (copy_from_user(out, buf, sizeof(*out)))
626            return -EFAULT;
627        return 0;
628    case IPC_OLD:
629        {
630        struct shmid_ds tbuf_old;
631
632        if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
633            return -EFAULT;
634
635        out->shm_perm.uid = tbuf_old.shm_perm.uid;
636        out->shm_perm.gid = tbuf_old.shm_perm.gid;
637        out->shm_perm.mode = tbuf_old.shm_perm.mode;
638
639        return 0;
640        }
641    default:
642        return -EINVAL;
643    }
644}
645
646static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
647{
648    switch(version) {
649    case IPC_64:
650        return copy_to_user(buf, in, sizeof(*in));
651    case IPC_OLD:
652        {
653        struct shminfo out;
654
655        if(in->shmmax > INT_MAX)
656            out.shmmax = INT_MAX;
657        else
658            out.shmmax = (int)in->shmmax;
659
660        out.shmmin = in->shmmin;
661        out.shmmni = in->shmmni;
662        out.shmseg = in->shmseg;
663        out.shmall = in->shmall;
664
665        return copy_to_user(buf, &out, sizeof(out));
666        }
667    default:
668        return -EINVAL;
669    }
670}
671
672/*
673 * Calculate and add used RSS and swap pages of a shm.
674 * Called with shm_ids.rw_mutex held as a reader
675 */
676static void shm_add_rss_swap(struct shmid_kernel *shp,
677    unsigned long *rss_add, unsigned long *swp_add)
678{
679    struct inode *inode;
680
681    inode = file_inode(shp->shm_file);
682
683    if (is_file_hugepages(shp->shm_file)) {
684        struct address_space *mapping = inode->i_mapping;
685        struct hstate *h = hstate_file(shp->shm_file);
686        *rss_add += pages_per_huge_page(h) * mapping->nrpages;
687    } else {
688#ifdef CONFIG_SHMEM
689        struct shmem_inode_info *info = SHMEM_I(inode);
690        spin_lock(&info->lock);
691        *rss_add += inode->i_mapping->nrpages;
692        *swp_add += info->swapped;
693        spin_unlock(&info->lock);
694#else
695        *rss_add += inode->i_mapping->nrpages;
696#endif
697    }
698}
699
700/*
701 * Called with shm_ids.rw_mutex held as a reader
702 */
703static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
704        unsigned long *swp)
705{
706    int next_id;
707    int total, in_use;
708
709    *rss = 0;
710    *swp = 0;
711
712    in_use = shm_ids(ns).in_use;
713
714    for (total = 0, next_id = 0; total < in_use; next_id++) {
715        struct kern_ipc_perm *ipc;
716        struct shmid_kernel *shp;
717
718        ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
719        if (ipc == NULL)
720            continue;
721        shp = container_of(ipc, struct shmid_kernel, shm_perm);
722
723        shm_add_rss_swap(shp, rss, swp);
724
725        total++;
726    }
727}
728
729/*
730 * This function handles some shmctl commands which require the rw_mutex
731 * to be held in write mode.
732 * NOTE: no locks must be held, the rw_mutex is taken inside this function.
733 */
734static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
735               struct shmid_ds __user *buf, int version)
736{
737    struct kern_ipc_perm *ipcp;
738    struct shmid64_ds shmid64;
739    struct shmid_kernel *shp;
740    int err;
741
742    if (cmd == IPC_SET) {
743        if (copy_shmid_from_user(&shmid64, buf, version))
744            return -EFAULT;
745    }
746
747    ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd,
748                   &shmid64.shm_perm, 0);
749    if (IS_ERR(ipcp))
750        return PTR_ERR(ipcp);
751
752    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
753
754    err = security_shm_shmctl(shp, cmd);
755    if (err)
756        goto out_unlock;
757    switch (cmd) {
758    case IPC_RMID:
759        do_shm_rmid(ns, ipcp);
760        goto out_up;
761    case IPC_SET:
762        err = ipc_update_perm(&shmid64.shm_perm, ipcp);
763        if (err)
764            goto out_unlock;
765        shp->shm_ctim = get_seconds();
766        break;
767    default:
768        err = -EINVAL;
769    }
770out_unlock:
771    shm_unlock(shp);
772out_up:
773    up_write(&shm_ids(ns).rw_mutex);
774    return err;
775}
776
777SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
778{
779    struct shmid_kernel *shp;
780    int err, version;
781    struct ipc_namespace *ns;
782
783    if (cmd < 0 || shmid < 0) {
784        err = -EINVAL;
785        goto out;
786    }
787
788    version = ipc_parse_version(&cmd);
789    ns = current->nsproxy->ipc_ns;
790
791    switch (cmd) { /* replace with proc interface ? */
792    case IPC_INFO:
793    {
794        struct shminfo64 shminfo;
795
796        err = security_shm_shmctl(NULL, cmd);
797        if (err)
798            return err;
799
800        memset(&shminfo, 0, sizeof(shminfo));
801        shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
802        shminfo.shmmax = ns->shm_ctlmax;
803        shminfo.shmall = ns->shm_ctlall;
804
805        shminfo.shmmin = SHMMIN;
806        if(copy_shminfo_to_user (buf, &shminfo, version))
807            return -EFAULT;
808
809        down_read(&shm_ids(ns).rw_mutex);
810        err = ipc_get_maxid(&shm_ids(ns));
811        up_read(&shm_ids(ns).rw_mutex);
812
813        if(err<0)
814            err = 0;
815        goto out;
816    }
817    case SHM_INFO:
818    {
819        struct shm_info shm_info;
820
821        err = security_shm_shmctl(NULL, cmd);
822        if (err)
823            return err;
824
825        memset(&shm_info, 0, sizeof(shm_info));
826        down_read(&shm_ids(ns).rw_mutex);
827        shm_info.used_ids = shm_ids(ns).in_use;
828        shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
829        shm_info.shm_tot = ns->shm_tot;
830        shm_info.swap_attempts = 0;
831        shm_info.swap_successes = 0;
832        err = ipc_get_maxid(&shm_ids(ns));
833        up_read(&shm_ids(ns).rw_mutex);
834        if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
835            err = -EFAULT;
836            goto out;
837        }
838
839        err = err < 0 ? 0 : err;
840        goto out;
841    }
842    case SHM_STAT:
843    case IPC_STAT:
844    {
845        struct shmid64_ds tbuf;
846        int result;
847
848        if (cmd == SHM_STAT) {
849            shp = shm_lock(ns, shmid);
850            if (IS_ERR(shp)) {
851                err = PTR_ERR(shp);
852                goto out;
853            }
854            result = shp->shm_perm.id;
855        } else {
856            shp = shm_lock_check(ns, shmid);
857            if (IS_ERR(shp)) {
858                err = PTR_ERR(shp);
859                goto out;
860            }
861            result = 0;
862        }
863        err = -EACCES;
864        if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
865            goto out_unlock;
866        err = security_shm_shmctl(shp, cmd);
867        if (err)
868            goto out_unlock;
869        memset(&tbuf, 0, sizeof(tbuf));
870        kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
871        tbuf.shm_segsz = shp->shm_segsz;
872        tbuf.shm_atime = shp->shm_atim;
873        tbuf.shm_dtime = shp->shm_dtim;
874        tbuf.shm_ctime = shp->shm_ctim;
875        tbuf.shm_cpid = shp->shm_cprid;
876        tbuf.shm_lpid = shp->shm_lprid;
877        tbuf.shm_nattch = shp->shm_nattch;
878        shm_unlock(shp);
879        if(copy_shmid_to_user (buf, &tbuf, version))
880            err = -EFAULT;
881        else
882            err = result;
883        goto out;
884    }
885    case SHM_LOCK:
886    case SHM_UNLOCK:
887    {
888        struct file *shm_file;
889
890        shp = shm_lock_check(ns, shmid);
891        if (IS_ERR(shp)) {
892            err = PTR_ERR(shp);
893            goto out;
894        }
895
896        audit_ipc_obj(&(shp->shm_perm));
897
898        if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
899            kuid_t euid = current_euid();
900            err = -EPERM;
901            if (!uid_eq(euid, shp->shm_perm.uid) &&
902                !uid_eq(euid, shp->shm_perm.cuid))
903                goto out_unlock;
904            if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
905                goto out_unlock;
906        }
907
908        err = security_shm_shmctl(shp, cmd);
909        if (err)
910            goto out_unlock;
911
912        shm_file = shp->shm_file;
913        if (is_file_hugepages(shm_file))
914            goto out_unlock;
915
916        if (cmd == SHM_LOCK) {
917            struct user_struct *user = current_user();
918            err = shmem_lock(shm_file, 1, user);
919            if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
920                shp->shm_perm.mode |= SHM_LOCKED;
921                shp->mlock_user = user;
922            }
923            goto out_unlock;
924        }
925
926        /* SHM_UNLOCK */
927        if (!(shp->shm_perm.mode & SHM_LOCKED))
928            goto out_unlock;
929        shmem_lock(shm_file, 0, shp->mlock_user);
930        shp->shm_perm.mode &= ~SHM_LOCKED;
931        shp->mlock_user = NULL;
932        get_file(shm_file);
933        shm_unlock(shp);
934        shmem_unlock_mapping(shm_file->f_mapping);
935        fput(shm_file);
936        goto out;
937    }
938    case IPC_RMID:
939    case IPC_SET:
940        err = shmctl_down(ns, shmid, cmd, buf, version);
941        return err;
942    default:
943        return -EINVAL;
944    }
945
946out_unlock:
947    shm_unlock(shp);
948out:
949    return err;
950}
951
952/*
953 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
954 *
955 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
956 * "raddr" thing points to kernel space, and there has to be a wrapper around
957 * this.
958 */
959long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
960          unsigned long shmlba)
961{
962    struct shmid_kernel *shp;
963    unsigned long addr;
964    unsigned long size;
965    struct file * file;
966    int err;
967    unsigned long flags;
968    unsigned long prot;
969    int acc_mode;
970    struct ipc_namespace *ns;
971    struct shm_file_data *sfd;
972    struct path path;
973    fmode_t f_mode;
974    unsigned long populate = 0;
975
976    err = -EINVAL;
977    if (shmid < 0)
978        goto out;
979    else if ((addr = (ulong)shmaddr)) {
980        if (addr & (shmlba - 1)) {
981            if (shmflg & SHM_RND)
982                addr &= ~(shmlba - 1); /* round down */
983            else
984#ifndef __ARCH_FORCE_SHMLBA
985                if (addr & ~PAGE_MASK)
986#endif
987                    goto out;
988        }
989        flags = MAP_SHARED | MAP_FIXED;
990    } else {
991        if ((shmflg & SHM_REMAP))
992            goto out;
993
994        flags = MAP_SHARED;
995    }
996
997    if (shmflg & SHM_RDONLY) {
998        prot = PROT_READ;
999        acc_mode = S_IRUGO;
1000        f_mode = FMODE_READ;
1001    } else {
1002        prot = PROT_READ | PROT_WRITE;
1003        acc_mode = S_IRUGO | S_IWUGO;
1004        f_mode = FMODE_READ | FMODE_WRITE;
1005    }
1006    if (shmflg & SHM_EXEC) {
1007        prot |= PROT_EXEC;
1008        acc_mode |= S_IXUGO;
1009    }
1010
1011    /*
1012     * We cannot rely on the fs check since SYSV IPC does have an
1013     * additional creator id...
1014     */
1015    ns = current->nsproxy->ipc_ns;
1016    shp = shm_lock_check(ns, shmid);
1017    if (IS_ERR(shp)) {
1018        err = PTR_ERR(shp);
1019        goto out;
1020    }
1021
1022    err = -EACCES;
1023    if (ipcperms(ns, &shp->shm_perm, acc_mode))
1024        goto out_unlock;
1025
1026    err = security_shm_shmat(shp, shmaddr, shmflg);
1027    if (err)
1028        goto out_unlock;
1029
1030    path = shp->shm_file->f_path;
1031    path_get(&path);
1032    shp->shm_nattch++;
1033    size = i_size_read(path.dentry->d_inode);
1034    shm_unlock(shp);
1035
1036    err = -ENOMEM;
1037    sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1038    if (!sfd)
1039        goto out_put_dentry;
1040
1041    file = alloc_file(&path, f_mode,
1042              is_file_hugepages(shp->shm_file) ?
1043                &shm_file_operations_huge :
1044                &shm_file_operations);
1045    err = PTR_ERR(file);
1046    if (IS_ERR(file))
1047        goto out_free;
1048
1049    file->private_data = sfd;
1050    file->f_mapping = shp->shm_file->f_mapping;
1051    sfd->id = shp->shm_perm.id;
1052    sfd->ns = get_ipc_ns(ns);
1053    sfd->file = shp->shm_file;
1054    sfd->vm_ops = NULL;
1055
1056    err = security_mmap_file(file, prot, flags);
1057    if (err)
1058        goto out_fput;
1059
1060    down_write(&current->mm->mmap_sem);
1061    if (addr && !(shmflg & SHM_REMAP)) {
1062        err = -EINVAL;
1063        if (find_vma_intersection(current->mm, addr, addr + size))
1064            goto invalid;
1065        /*
1066         * If shm segment goes below stack, make sure there is some
1067         * space left for the stack to grow (at least 4 pages).
1068         */
1069        if (addr < current->mm->start_stack &&
1070            addr > current->mm->start_stack - size - PAGE_SIZE * 5)
1071            goto invalid;
1072    }
1073        
1074    addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
1075    *raddr = addr;
1076    err = 0;
1077    if (IS_ERR_VALUE(addr))
1078        err = (long)addr;
1079invalid:
1080    up_write(&current->mm->mmap_sem);
1081    if (populate)
1082        mm_populate(addr, populate);
1083
1084out_fput:
1085    fput(file);
1086
1087out_nattch:
1088    down_write(&shm_ids(ns).rw_mutex);
1089    shp = shm_lock(ns, shmid);
1090    BUG_ON(IS_ERR(shp));
1091    shp->shm_nattch--;
1092    if (shm_may_destroy(ns, shp))
1093        shm_destroy(ns, shp);
1094    else
1095        shm_unlock(shp);
1096    up_write(&shm_ids(ns).rw_mutex);
1097
1098out:
1099    return err;
1100
1101out_unlock:
1102    shm_unlock(shp);
1103    goto out;
1104
1105out_free:
1106    kfree(sfd);
1107out_put_dentry:
1108    path_put(&path);
1109    goto out_nattch;
1110}
1111
1112SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1113{
1114    unsigned long ret;
1115    long err;
1116
1117    err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1118    if (err)
1119        return err;
1120    force_successful_syscall_return();
1121    return (long)ret;
1122}
1123
1124/*
1125 * detach and kill segment if marked destroyed.
1126 * The work is done in shm_close.
1127 */
1128SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1129{
1130    struct mm_struct *mm = current->mm;
1131    struct vm_area_struct *vma;
1132    unsigned long addr = (unsigned long)shmaddr;
1133    int retval = -EINVAL;
1134#ifdef CONFIG_MMU
1135    loff_t size = 0;
1136    struct vm_area_struct *next;
1137#endif
1138
1139    if (addr & ~PAGE_MASK)
1140        return retval;
1141
1142    down_write(&mm->mmap_sem);
1143
1144    /*
1145     * This function tries to be smart and unmap shm segments that
1146     * were modified by partial mlock or munmap calls:
1147     * - It first determines the size of the shm segment that should be
1148     * unmapped: It searches for a vma that is backed by shm and that
1149     * started at address shmaddr. It records it's size and then unmaps
1150     * it.
1151     * - Then it unmaps all shm vmas that started at shmaddr and that
1152     * are within the initially determined size.
1153     * Errors from do_munmap are ignored: the function only fails if
1154     * it's called with invalid parameters or if it's called to unmap
1155     * a part of a vma. Both calls in this function are for full vmas,
1156     * the parameters are directly copied from the vma itself and always
1157     * valid - therefore do_munmap cannot fail. (famous last words?)
1158     */
1159    /*
1160     * If it had been mremap()'d, the starting address would not
1161     * match the usual checks anyway. So assume all vma's are
1162     * above the starting address given.
1163     */
1164    vma = find_vma(mm, addr);
1165
1166#ifdef CONFIG_MMU
1167    while (vma) {
1168        next = vma->vm_next;
1169
1170        /*
1171         * Check if the starting address would match, i.e. it's
1172         * a fragment created by mprotect() and/or munmap(), or it
1173         * otherwise it starts at this address with no hassles.
1174         */
1175        if ((vma->vm_ops == &shm_vm_ops) &&
1176            (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1177
1178
1179            size = file_inode(vma->vm_file)->i_size;
1180            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1181            /*
1182             * We discovered the size of the shm segment, so
1183             * break out of here and fall through to the next
1184             * loop that uses the size information to stop
1185             * searching for matching vma's.
1186             */
1187            retval = 0;
1188            vma = next;
1189            break;
1190        }
1191        vma = next;
1192    }
1193
1194    /*
1195     * We need look no further than the maximum address a fragment
1196     * could possibly have landed at. Also cast things to loff_t to
1197     * prevent overflows and make comparisons vs. equal-width types.
1198     */
1199    size = PAGE_ALIGN(size);
1200    while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1201        next = vma->vm_next;
1202
1203        /* finding a matching vma now does not alter retval */
1204        if ((vma->vm_ops == &shm_vm_ops) &&
1205            (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
1206
1207            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1208        vma = next;
1209    }
1210
1211#else /* CONFIG_MMU */
1212    /* under NOMMU conditions, the exact address to be destroyed must be
1213     * given */
1214    retval = -EINVAL;
1215    if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1216        do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1217        retval = 0;
1218    }
1219
1220#endif
1221
1222    up_write(&mm->mmap_sem);
1223    return retval;
1224}
1225
1226#ifdef CONFIG_PROC_FS
1227static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1228{
1229    struct user_namespace *user_ns = seq_user_ns(s);
1230    struct shmid_kernel *shp = it;
1231    unsigned long rss = 0, swp = 0;
1232
1233    shm_add_rss_swap(shp, &rss, &swp);
1234
1235#if BITS_PER_LONG <= 32
1236#define SIZE_SPEC "%10lu"
1237#else
1238#define SIZE_SPEC "%21lu"
1239#endif
1240
1241    return seq_printf(s,
1242              "%10d %10d %4o " SIZE_SPEC " %5u %5u "
1243              "%5lu %5u %5u %5u %5u %10lu %10lu %10lu "
1244              SIZE_SPEC " " SIZE_SPEC "\n",
1245              shp->shm_perm.key,
1246              shp->shm_perm.id,
1247              shp->shm_perm.mode,
1248              shp->shm_segsz,
1249              shp->shm_cprid,
1250              shp->shm_lprid,
1251              shp->shm_nattch,
1252              from_kuid_munged(user_ns, shp->shm_perm.uid),
1253              from_kgid_munged(user_ns, shp->shm_perm.gid),
1254              from_kuid_munged(user_ns, shp->shm_perm.cuid),
1255              from_kgid_munged(user_ns, shp->shm_perm.cgid),
1256              shp->shm_atim,
1257              shp->shm_dtim,
1258              shp->shm_ctim,
1259              rss * PAGE_SIZE,
1260              swp * PAGE_SIZE);
1261}
1262#endif
1263

Archive Download this file



interactive