Root/ipc/shm.c

1/*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7 *
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15 *
16 * support for audit of ipc object properties and permission changes
17 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18 *
19 * namespaces support
20 * OpenVZ, SWsoft Inc.
21 * Pavel Emelianov <xemul@openvz.org>
22 */
23
24#include <linux/slab.h>
25#include <linux/mm.h>
26#include <linux/hugetlb.h>
27#include <linux/shm.h>
28#include <linux/init.h>
29#include <linux/file.h>
30#include <linux/mman.h>
31#include <linux/shmem_fs.h>
32#include <linux/security.h>
33#include <linux/syscalls.h>
34#include <linux/audit.h>
35#include <linux/capability.h>
36#include <linux/ptrace.h>
37#include <linux/seq_file.h>
38#include <linux/rwsem.h>
39#include <linux/nsproxy.h>
40#include <linux/mount.h>
41#include <linux/ipc_namespace.h>
42
43#include <asm/uaccess.h>
44
45#include "util.h"
46
47struct shm_file_data {
48    int id;
49    struct ipc_namespace *ns;
50    struct file *file;
51    const struct vm_operations_struct *vm_ops;
52};
53
54#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
55
56static const struct file_operations shm_file_operations;
57static const struct vm_operations_struct shm_vm_ops;
58
59#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
60
61#define shm_unlock(shp) \
62    ipc_unlock(&(shp)->shm_perm)
63
64static int newseg(struct ipc_namespace *, struct ipc_params *);
65static void shm_open(struct vm_area_struct *vma);
66static void shm_close(struct vm_area_struct *vma);
67static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
68#ifdef CONFIG_PROC_FS
69static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
70#endif
71
72void shm_init_ns(struct ipc_namespace *ns)
73{
74    ns->shm_ctlmax = SHMMAX;
75    ns->shm_ctlall = SHMALL;
76    ns->shm_ctlmni = SHMMNI;
77    ns->shm_rmid_forced = 0;
78    ns->shm_tot = 0;
79    ipc_init_ids(&shm_ids(ns));
80}
81
82/*
83 * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
84 * Only shm_ids.rw_mutex remains locked on exit.
85 */
86static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
87{
88    struct shmid_kernel *shp;
89    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
90
91    if (shp->shm_nattch){
92        shp->shm_perm.mode |= SHM_DEST;
93        /* Do not find it any more */
94        shp->shm_perm.key = IPC_PRIVATE;
95        shm_unlock(shp);
96    } else
97        shm_destroy(ns, shp);
98}
99
100#ifdef CONFIG_IPC_NS
101void shm_exit_ns(struct ipc_namespace *ns)
102{
103    free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
104    idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
105}
106#endif
107
108static int __init ipc_ns_init(void)
109{
110    shm_init_ns(&init_ipc_ns);
111    return 0;
112}
113
114pure_initcall(ipc_ns_init);
115
116void __init shm_init (void)
117{
118    ipc_init_proc_interface("sysvipc/shm",
119#if BITS_PER_LONG <= 32
120                " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
121#else
122                " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
123#endif
124                IPC_SHM_IDS, sysvipc_shm_proc_show);
125}
126
127/*
128 * shm_lock_(check_) routines are called in the paths where the rw_mutex
129 * is not necessarily held.
130 */
131static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
132{
133    struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
134
135    if (IS_ERR(ipcp))
136        return (struct shmid_kernel *)ipcp;
137
138    return container_of(ipcp, struct shmid_kernel, shm_perm);
139}
140
141static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
142{
143    rcu_read_lock();
144    spin_lock(&ipcp->shm_perm.lock);
145}
146
147static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
148                        int id)
149{
150    struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
151
152    if (IS_ERR(ipcp))
153        return (struct shmid_kernel *)ipcp;
154
155    return container_of(ipcp, struct shmid_kernel, shm_perm);
156}
157
158static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
159{
160    ipc_rmid(&shm_ids(ns), &s->shm_perm);
161}
162
163
164/* This is called by fork, once for every shm attach. */
165static void shm_open(struct vm_area_struct *vma)
166{
167    struct file *file = vma->vm_file;
168    struct shm_file_data *sfd = shm_file_data(file);
169    struct shmid_kernel *shp;
170
171    shp = shm_lock(sfd->ns, sfd->id);
172    BUG_ON(IS_ERR(shp));
173    shp->shm_atim = get_seconds();
174    shp->shm_lprid = task_tgid_vnr(current);
175    shp->shm_nattch++;
176    shm_unlock(shp);
177}
178
179/*
180 * shm_destroy - free the struct shmid_kernel
181 *
182 * @ns: namespace
183 * @shp: struct to free
184 *
185 * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
186 * but returns with shp unlocked and freed.
187 */
188static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
189{
190    ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
191    shm_rmid(ns, shp);
192    shm_unlock(shp);
193    if (!is_file_hugepages(shp->shm_file))
194        shmem_lock(shp->shm_file, 0, shp->mlock_user);
195    else if (shp->mlock_user)
196        user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size,
197                        shp->mlock_user);
198    fput (shp->shm_file);
199    security_shm_free(shp);
200    ipc_rcu_putref(shp);
201}
202
203/*
204 * shm_may_destroy - identifies whether shm segment should be destroyed now
205 *
206 * Returns true if and only if there are no active users of the segment and
207 * one of the following is true:
208 *
209 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
210 *
211 * 2) sysctl kernel.shm_rmid_forced is set to 1.
212 */
213static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
214{
215    return (shp->shm_nattch == 0) &&
216           (ns->shm_rmid_forced ||
217        (shp->shm_perm.mode & SHM_DEST));
218}
219
220/*
221 * remove the attach descriptor vma.
222 * free memory for segment if it is marked destroyed.
223 * The descriptor has already been removed from the current->mm->mmap list
224 * and will later be kfree()d.
225 */
226static void shm_close(struct vm_area_struct *vma)
227{
228    struct file * file = vma->vm_file;
229    struct shm_file_data *sfd = shm_file_data(file);
230    struct shmid_kernel *shp;
231    struct ipc_namespace *ns = sfd->ns;
232
233    down_write(&shm_ids(ns).rw_mutex);
234    /* remove from the list of attaches of the shm segment */
235    shp = shm_lock(ns, sfd->id);
236    BUG_ON(IS_ERR(shp));
237    shp->shm_lprid = task_tgid_vnr(current);
238    shp->shm_dtim = get_seconds();
239    shp->shm_nattch--;
240    if (shm_may_destroy(ns, shp))
241        shm_destroy(ns, shp);
242    else
243        shm_unlock(shp);
244    up_write(&shm_ids(ns).rw_mutex);
245}
246
247/* Called with ns->shm_ids(ns).rw_mutex locked */
248static int shm_try_destroy_current(int id, void *p, void *data)
249{
250    struct ipc_namespace *ns = data;
251    struct kern_ipc_perm *ipcp = p;
252    struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
253
254    if (shp->shm_creator != current)
255        return 0;
256
257    /*
258     * Mark it as orphaned to destroy the segment when
259     * kernel.shm_rmid_forced is changed.
260     * It is noop if the following shm_may_destroy() returns true.
261     */
262    shp->shm_creator = NULL;
263
264    /*
265     * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
266     * is not set, it shouldn't be deleted here.
267     */
268    if (!ns->shm_rmid_forced)
269        return 0;
270
271    if (shm_may_destroy(ns, shp)) {
272        shm_lock_by_ptr(shp);
273        shm_destroy(ns, shp);
274    }
275    return 0;
276}
277
278/* Called with ns->shm_ids(ns).rw_mutex locked */
279static int shm_try_destroy_orphaned(int id, void *p, void *data)
280{
281    struct ipc_namespace *ns = data;
282    struct kern_ipc_perm *ipcp = p;
283    struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
284
285    /*
286     * We want to destroy segments without users and with already
287     * exit'ed originating process.
288     *
289     * As shp->* are changed under rw_mutex, it's safe to skip shp locking.
290     */
291    if (shp->shm_creator != NULL)
292        return 0;
293
294    if (shm_may_destroy(ns, shp)) {
295        shm_lock_by_ptr(shp);
296        shm_destroy(ns, shp);
297    }
298    return 0;
299}
300
301void shm_destroy_orphaned(struct ipc_namespace *ns)
302{
303    down_write(&shm_ids(ns).rw_mutex);
304    if (shm_ids(ns).in_use)
305        idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
306    up_write(&shm_ids(ns).rw_mutex);
307}
308
309
310void exit_shm(struct task_struct *task)
311{
312    struct ipc_namespace *ns = task->nsproxy->ipc_ns;
313
314    if (shm_ids(ns).in_use == 0)
315        return;
316
317    /* Destroy all already created segments, but not mapped yet */
318    down_write(&shm_ids(ns).rw_mutex);
319    if (shm_ids(ns).in_use)
320        idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
321    up_write(&shm_ids(ns).rw_mutex);
322}
323
324static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
325{
326    struct file *file = vma->vm_file;
327    struct shm_file_data *sfd = shm_file_data(file);
328
329    return sfd->vm_ops->fault(vma, vmf);
330}
331
332#ifdef CONFIG_NUMA
333static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
334{
335    struct file *file = vma->vm_file;
336    struct shm_file_data *sfd = shm_file_data(file);
337    int err = 0;
338    if (sfd->vm_ops->set_policy)
339        err = sfd->vm_ops->set_policy(vma, new);
340    return err;
341}
342
343static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
344                    unsigned long addr)
345{
346    struct file *file = vma->vm_file;
347    struct shm_file_data *sfd = shm_file_data(file);
348    struct mempolicy *pol = NULL;
349
350    if (sfd->vm_ops->get_policy)
351        pol = sfd->vm_ops->get_policy(vma, addr);
352    else if (vma->vm_policy)
353        pol = vma->vm_policy;
354
355    return pol;
356}
357#endif
358
359static int shm_mmap(struct file * file, struct vm_area_struct * vma)
360{
361    struct shm_file_data *sfd = shm_file_data(file);
362    int ret;
363
364    ret = sfd->file->f_op->mmap(sfd->file, vma);
365    if (ret != 0)
366        return ret;
367    sfd->vm_ops = vma->vm_ops;
368#ifdef CONFIG_MMU
369    BUG_ON(!sfd->vm_ops->fault);
370#endif
371    vma->vm_ops = &shm_vm_ops;
372    shm_open(vma);
373
374    return ret;
375}
376
377static int shm_release(struct inode *ino, struct file *file)
378{
379    struct shm_file_data *sfd = shm_file_data(file);
380
381    put_ipc_ns(sfd->ns);
382    shm_file_data(file) = NULL;
383    kfree(sfd);
384    return 0;
385}
386
387static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
388{
389    struct shm_file_data *sfd = shm_file_data(file);
390
391    if (!sfd->file->f_op->fsync)
392        return -EINVAL;
393    return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
394}
395
396static long shm_fallocate(struct file *file, int mode, loff_t offset,
397              loff_t len)
398{
399    struct shm_file_data *sfd = shm_file_data(file);
400
401    if (!sfd->file->f_op->fallocate)
402        return -EOPNOTSUPP;
403    return sfd->file->f_op->fallocate(file, mode, offset, len);
404}
405
406static unsigned long shm_get_unmapped_area(struct file *file,
407    unsigned long addr, unsigned long len, unsigned long pgoff,
408    unsigned long flags)
409{
410    struct shm_file_data *sfd = shm_file_data(file);
411    return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
412                        pgoff, flags);
413}
414
415static const struct file_operations shm_file_operations = {
416    .mmap = shm_mmap,
417    .fsync = shm_fsync,
418    .release = shm_release,
419#ifndef CONFIG_MMU
420    .get_unmapped_area = shm_get_unmapped_area,
421#endif
422    .llseek = noop_llseek,
423    .fallocate = shm_fallocate,
424};
425
426static const struct file_operations shm_file_operations_huge = {
427    .mmap = shm_mmap,
428    .fsync = shm_fsync,
429    .release = shm_release,
430    .get_unmapped_area = shm_get_unmapped_area,
431    .llseek = noop_llseek,
432    .fallocate = shm_fallocate,
433};
434
435int is_file_shm_hugepages(struct file *file)
436{
437    return file->f_op == &shm_file_operations_huge;
438}
439
440static const struct vm_operations_struct shm_vm_ops = {
441    .open = shm_open, /* callback for a new vm-area open */
442    .close = shm_close, /* callback for when the vm-area is released */
443    .fault = shm_fault,
444#if defined(CONFIG_NUMA)
445    .set_policy = shm_set_policy,
446    .get_policy = shm_get_policy,
447#endif
448};
449
450/**
451 * newseg - Create a new shared memory segment
452 * @ns: namespace
453 * @params: ptr to the structure that contains key, size and shmflg
454 *
455 * Called with shm_ids.rw_mutex held as a writer.
456 */
457
458static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
459{
460    key_t key = params->key;
461    int shmflg = params->flg;
462    size_t size = params->u.size;
463    int error;
464    struct shmid_kernel *shp;
465    int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
466    struct file * file;
467    char name[13];
468    int id;
469    vm_flags_t acctflag = 0;
470
471    if (size < SHMMIN || size > ns->shm_ctlmax)
472        return -EINVAL;
473
474    if (ns->shm_tot + numpages > ns->shm_ctlall)
475        return -ENOSPC;
476
477    shp = ipc_rcu_alloc(sizeof(*shp));
478    if (!shp)
479        return -ENOMEM;
480
481    shp->shm_perm.key = key;
482    shp->shm_perm.mode = (shmflg & S_IRWXUGO);
483    shp->mlock_user = NULL;
484
485    shp->shm_perm.security = NULL;
486    error = security_shm_alloc(shp);
487    if (error) {
488        ipc_rcu_putref(shp);
489        return error;
490    }
491
492    sprintf (name, "SYSV%08x", key);
493    if (shmflg & SHM_HUGETLB) {
494        /* hugetlb_file_setup applies strict accounting */
495        if (shmflg & SHM_NORESERVE)
496            acctflag = VM_NORESERVE;
497        file = hugetlb_file_setup(name, 0, size, acctflag,
498                    &shp->mlock_user, HUGETLB_SHMFS_INODE);
499    } else {
500        /*
501         * Do not allow no accounting for OVERCOMMIT_NEVER, even
502          * if it's asked for.
503         */
504        if ((shmflg & SHM_NORESERVE) &&
505                sysctl_overcommit_memory != OVERCOMMIT_NEVER)
506            acctflag = VM_NORESERVE;
507        file = shmem_file_setup(name, size, acctflag);
508    }
509    error = PTR_ERR(file);
510    if (IS_ERR(file))
511        goto no_file;
512
513    id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
514    if (id < 0) {
515        error = id;
516        goto no_id;
517    }
518
519    shp->shm_cprid = task_tgid_vnr(current);
520    shp->shm_lprid = 0;
521    shp->shm_atim = shp->shm_dtim = 0;
522    shp->shm_ctim = get_seconds();
523    shp->shm_segsz = size;
524    shp->shm_nattch = 0;
525    shp->shm_file = file;
526    shp->shm_creator = current;
527    /*
528     * shmid gets reported as "inode#" in /proc/pid/maps.
529     * proc-ps tools use this. Changing this will break them.
530     */
531    file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
532
533    ns->shm_tot += numpages;
534    error = shp->shm_perm.id;
535    shm_unlock(shp);
536    return error;
537
538no_id:
539    if (is_file_hugepages(file) && shp->mlock_user)
540        user_shm_unlock(size, shp->mlock_user);
541    fput(file);
542no_file:
543    security_shm_free(shp);
544    ipc_rcu_putref(shp);
545    return error;
546}
547
548/*
549 * Called with shm_ids.rw_mutex and ipcp locked.
550 */
551static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
552{
553    struct shmid_kernel *shp;
554
555    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
556    return security_shm_associate(shp, shmflg);
557}
558
559/*
560 * Called with shm_ids.rw_mutex and ipcp locked.
561 */
562static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
563                struct ipc_params *params)
564{
565    struct shmid_kernel *shp;
566
567    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
568    if (shp->shm_segsz < params->u.size)
569        return -EINVAL;
570
571    return 0;
572}
573
574SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
575{
576    struct ipc_namespace *ns;
577    struct ipc_ops shm_ops;
578    struct ipc_params shm_params;
579
580    ns = current->nsproxy->ipc_ns;
581
582    shm_ops.getnew = newseg;
583    shm_ops.associate = shm_security;
584    shm_ops.more_checks = shm_more_checks;
585
586    shm_params.key = key;
587    shm_params.flg = shmflg;
588    shm_params.u.size = size;
589
590    return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
591}
592
593static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
594{
595    switch(version) {
596    case IPC_64:
597        return copy_to_user(buf, in, sizeof(*in));
598    case IPC_OLD:
599        {
600        struct shmid_ds out;
601
602        memset(&out, 0, sizeof(out));
603        ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
604        out.shm_segsz = in->shm_segsz;
605        out.shm_atime = in->shm_atime;
606        out.shm_dtime = in->shm_dtime;
607        out.shm_ctime = in->shm_ctime;
608        out.shm_cpid = in->shm_cpid;
609        out.shm_lpid = in->shm_lpid;
610        out.shm_nattch = in->shm_nattch;
611
612        return copy_to_user(buf, &out, sizeof(out));
613        }
614    default:
615        return -EINVAL;
616    }
617}
618
619static inline unsigned long
620copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
621{
622    switch(version) {
623    case IPC_64:
624        if (copy_from_user(out, buf, sizeof(*out)))
625            return -EFAULT;
626        return 0;
627    case IPC_OLD:
628        {
629        struct shmid_ds tbuf_old;
630
631        if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
632            return -EFAULT;
633
634        out->shm_perm.uid = tbuf_old.shm_perm.uid;
635        out->shm_perm.gid = tbuf_old.shm_perm.gid;
636        out->shm_perm.mode = tbuf_old.shm_perm.mode;
637
638        return 0;
639        }
640    default:
641        return -EINVAL;
642    }
643}
644
645static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
646{
647    switch(version) {
648    case IPC_64:
649        return copy_to_user(buf, in, sizeof(*in));
650    case IPC_OLD:
651        {
652        struct shminfo out;
653
654        if(in->shmmax > INT_MAX)
655            out.shmmax = INT_MAX;
656        else
657            out.shmmax = (int)in->shmmax;
658
659        out.shmmin = in->shmmin;
660        out.shmmni = in->shmmni;
661        out.shmseg = in->shmseg;
662        out.shmall = in->shmall;
663
664        return copy_to_user(buf, &out, sizeof(out));
665        }
666    default:
667        return -EINVAL;
668    }
669}
670
671/*
672 * Calculate and add used RSS and swap pages of a shm.
673 * Called with shm_ids.rw_mutex held as a reader
674 */
675static void shm_add_rss_swap(struct shmid_kernel *shp,
676    unsigned long *rss_add, unsigned long *swp_add)
677{
678    struct inode *inode;
679
680    inode = shp->shm_file->f_path.dentry->d_inode;
681
682    if (is_file_hugepages(shp->shm_file)) {
683        struct address_space *mapping = inode->i_mapping;
684        struct hstate *h = hstate_file(shp->shm_file);
685        *rss_add += pages_per_huge_page(h) * mapping->nrpages;
686    } else {
687#ifdef CONFIG_SHMEM
688        struct shmem_inode_info *info = SHMEM_I(inode);
689        spin_lock(&info->lock);
690        *rss_add += inode->i_mapping->nrpages;
691        *swp_add += info->swapped;
692        spin_unlock(&info->lock);
693#else
694        *rss_add += inode->i_mapping->nrpages;
695#endif
696    }
697}
698
699/*
700 * Called with shm_ids.rw_mutex held as a reader
701 */
702static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
703        unsigned long *swp)
704{
705    int next_id;
706    int total, in_use;
707
708    *rss = 0;
709    *swp = 0;
710
711    in_use = shm_ids(ns).in_use;
712
713    for (total = 0, next_id = 0; total < in_use; next_id++) {
714        struct kern_ipc_perm *ipc;
715        struct shmid_kernel *shp;
716
717        ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
718        if (ipc == NULL)
719            continue;
720        shp = container_of(ipc, struct shmid_kernel, shm_perm);
721
722        shm_add_rss_swap(shp, rss, swp);
723
724        total++;
725    }
726}
727
728/*
729 * This function handles some shmctl commands which require the rw_mutex
730 * to be held in write mode.
731 * NOTE: no locks must be held, the rw_mutex is taken inside this function.
732 */
733static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
734               struct shmid_ds __user *buf, int version)
735{
736    struct kern_ipc_perm *ipcp;
737    struct shmid64_ds shmid64;
738    struct shmid_kernel *shp;
739    int err;
740
741    if (cmd == IPC_SET) {
742        if (copy_shmid_from_user(&shmid64, buf, version))
743            return -EFAULT;
744    }
745
746    ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd,
747                   &shmid64.shm_perm, 0);
748    if (IS_ERR(ipcp))
749        return PTR_ERR(ipcp);
750
751    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
752
753    err = security_shm_shmctl(shp, cmd);
754    if (err)
755        goto out_unlock;
756    switch (cmd) {
757    case IPC_RMID:
758        do_shm_rmid(ns, ipcp);
759        goto out_up;
760    case IPC_SET:
761        ipc_update_perm(&shmid64.shm_perm, ipcp);
762        shp->shm_ctim = get_seconds();
763        break;
764    default:
765        err = -EINVAL;
766    }
767out_unlock:
768    shm_unlock(shp);
769out_up:
770    up_write(&shm_ids(ns).rw_mutex);
771    return err;
772}
773
774SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
775{
776    struct shmid_kernel *shp;
777    int err, version;
778    struct ipc_namespace *ns;
779
780    if (cmd < 0 || shmid < 0) {
781        err = -EINVAL;
782        goto out;
783    }
784
785    version = ipc_parse_version(&cmd);
786    ns = current->nsproxy->ipc_ns;
787
788    switch (cmd) { /* replace with proc interface ? */
789    case IPC_INFO:
790    {
791        struct shminfo64 shminfo;
792
793        err = security_shm_shmctl(NULL, cmd);
794        if (err)
795            return err;
796
797        memset(&shminfo, 0, sizeof(shminfo));
798        shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
799        shminfo.shmmax = ns->shm_ctlmax;
800        shminfo.shmall = ns->shm_ctlall;
801
802        shminfo.shmmin = SHMMIN;
803        if(copy_shminfo_to_user (buf, &shminfo, version))
804            return -EFAULT;
805
806        down_read(&shm_ids(ns).rw_mutex);
807        err = ipc_get_maxid(&shm_ids(ns));
808        up_read(&shm_ids(ns).rw_mutex);
809
810        if(err<0)
811            err = 0;
812        goto out;
813    }
814    case SHM_INFO:
815    {
816        struct shm_info shm_info;
817
818        err = security_shm_shmctl(NULL, cmd);
819        if (err)
820            return err;
821
822        memset(&shm_info, 0, sizeof(shm_info));
823        down_read(&shm_ids(ns).rw_mutex);
824        shm_info.used_ids = shm_ids(ns).in_use;
825        shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
826        shm_info.shm_tot = ns->shm_tot;
827        shm_info.swap_attempts = 0;
828        shm_info.swap_successes = 0;
829        err = ipc_get_maxid(&shm_ids(ns));
830        up_read(&shm_ids(ns).rw_mutex);
831        if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
832            err = -EFAULT;
833            goto out;
834        }
835
836        err = err < 0 ? 0 : err;
837        goto out;
838    }
839    case SHM_STAT:
840    case IPC_STAT:
841    {
842        struct shmid64_ds tbuf;
843        int result;
844
845        if (cmd == SHM_STAT) {
846            shp = shm_lock(ns, shmid);
847            if (IS_ERR(shp)) {
848                err = PTR_ERR(shp);
849                goto out;
850            }
851            result = shp->shm_perm.id;
852        } else {
853            shp = shm_lock_check(ns, shmid);
854            if (IS_ERR(shp)) {
855                err = PTR_ERR(shp);
856                goto out;
857            }
858            result = 0;
859        }
860        err = -EACCES;
861        if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
862            goto out_unlock;
863        err = security_shm_shmctl(shp, cmd);
864        if (err)
865            goto out_unlock;
866        memset(&tbuf, 0, sizeof(tbuf));
867        kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
868        tbuf.shm_segsz = shp->shm_segsz;
869        tbuf.shm_atime = shp->shm_atim;
870        tbuf.shm_dtime = shp->shm_dtim;
871        tbuf.shm_ctime = shp->shm_ctim;
872        tbuf.shm_cpid = shp->shm_cprid;
873        tbuf.shm_lpid = shp->shm_lprid;
874        tbuf.shm_nattch = shp->shm_nattch;
875        shm_unlock(shp);
876        if(copy_shmid_to_user (buf, &tbuf, version))
877            err = -EFAULT;
878        else
879            err = result;
880        goto out;
881    }
882    case SHM_LOCK:
883    case SHM_UNLOCK:
884    {
885        struct file *shm_file;
886
887        shp = shm_lock_check(ns, shmid);
888        if (IS_ERR(shp)) {
889            err = PTR_ERR(shp);
890            goto out;
891        }
892
893        audit_ipc_obj(&(shp->shm_perm));
894
895        if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
896            uid_t euid = current_euid();
897            err = -EPERM;
898            if (euid != shp->shm_perm.uid &&
899                euid != shp->shm_perm.cuid)
900                goto out_unlock;
901            if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
902                goto out_unlock;
903        }
904
905        err = security_shm_shmctl(shp, cmd);
906        if (err)
907            goto out_unlock;
908
909        shm_file = shp->shm_file;
910        if (is_file_hugepages(shm_file))
911            goto out_unlock;
912
913        if (cmd == SHM_LOCK) {
914            struct user_struct *user = current_user();
915            err = shmem_lock(shm_file, 1, user);
916            if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
917                shp->shm_perm.mode |= SHM_LOCKED;
918                shp->mlock_user = user;
919            }
920            goto out_unlock;
921        }
922
923        /* SHM_UNLOCK */
924        if (!(shp->shm_perm.mode & SHM_LOCKED))
925            goto out_unlock;
926        shmem_lock(shm_file, 0, shp->mlock_user);
927        shp->shm_perm.mode &= ~SHM_LOCKED;
928        shp->mlock_user = NULL;
929        get_file(shm_file);
930        shm_unlock(shp);
931        shmem_unlock_mapping(shm_file->f_mapping);
932        fput(shm_file);
933        goto out;
934    }
935    case IPC_RMID:
936    case IPC_SET:
937        err = shmctl_down(ns, shmid, cmd, buf, version);
938        return err;
939    default:
940        return -EINVAL;
941    }
942
943out_unlock:
944    shm_unlock(shp);
945out:
946    return err;
947}
948
949/*
950 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
951 *
952 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
953 * "raddr" thing points to kernel space, and there has to be a wrapper around
954 * this.
955 */
956long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
957          unsigned long shmlba)
958{
959    struct shmid_kernel *shp;
960    unsigned long addr;
961    unsigned long size;
962    struct file * file;
963    int err;
964    unsigned long flags;
965    unsigned long prot;
966    int acc_mode;
967    unsigned long user_addr;
968    struct ipc_namespace *ns;
969    struct shm_file_data *sfd;
970    struct path path;
971    fmode_t f_mode;
972
973    err = -EINVAL;
974    if (shmid < 0)
975        goto out;
976    else if ((addr = (ulong)shmaddr)) {
977        if (addr & (shmlba - 1)) {
978            if (shmflg & SHM_RND)
979                addr &= ~(shmlba - 1); /* round down */
980            else
981#ifndef __ARCH_FORCE_SHMLBA
982                if (addr & ~PAGE_MASK)
983#endif
984                    goto out;
985        }
986        flags = MAP_SHARED | MAP_FIXED;
987    } else {
988        if ((shmflg & SHM_REMAP))
989            goto out;
990
991        flags = MAP_SHARED;
992    }
993
994    if (shmflg & SHM_RDONLY) {
995        prot = PROT_READ;
996        acc_mode = S_IRUGO;
997        f_mode = FMODE_READ;
998    } else {
999        prot = PROT_READ | PROT_WRITE;
1000        acc_mode = S_IRUGO | S_IWUGO;
1001        f_mode = FMODE_READ | FMODE_WRITE;
1002    }
1003    if (shmflg & SHM_EXEC) {
1004        prot |= PROT_EXEC;
1005        acc_mode |= S_IXUGO;
1006    }
1007
1008    /*
1009     * We cannot rely on the fs check since SYSV IPC does have an
1010     * additional creator id...
1011     */
1012    ns = current->nsproxy->ipc_ns;
1013    shp = shm_lock_check(ns, shmid);
1014    if (IS_ERR(shp)) {
1015        err = PTR_ERR(shp);
1016        goto out;
1017    }
1018
1019    err = -EACCES;
1020    if (ipcperms(ns, &shp->shm_perm, acc_mode))
1021        goto out_unlock;
1022
1023    err = security_shm_shmat(shp, shmaddr, shmflg);
1024    if (err)
1025        goto out_unlock;
1026
1027    path = shp->shm_file->f_path;
1028    path_get(&path);
1029    shp->shm_nattch++;
1030    size = i_size_read(path.dentry->d_inode);
1031    shm_unlock(shp);
1032
1033    err = -ENOMEM;
1034    sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1035    if (!sfd)
1036        goto out_put_dentry;
1037
1038    file = alloc_file(&path, f_mode,
1039              is_file_hugepages(shp->shm_file) ?
1040                &shm_file_operations_huge :
1041                &shm_file_operations);
1042    if (!file)
1043        goto out_free;
1044
1045    file->private_data = sfd;
1046    file->f_mapping = shp->shm_file->f_mapping;
1047    sfd->id = shp->shm_perm.id;
1048    sfd->ns = get_ipc_ns(ns);
1049    sfd->file = shp->shm_file;
1050    sfd->vm_ops = NULL;
1051
1052    err = security_mmap_file(file, prot, flags);
1053    if (err)
1054        goto out_fput;
1055
1056    down_write(&current->mm->mmap_sem);
1057    if (addr && !(shmflg & SHM_REMAP)) {
1058        err = -EINVAL;
1059        if (find_vma_intersection(current->mm, addr, addr + size))
1060            goto invalid;
1061        /*
1062         * If shm segment goes below stack, make sure there is some
1063         * space left for the stack to grow (at least 4 pages).
1064         */
1065        if (addr < current->mm->start_stack &&
1066            addr > current->mm->start_stack - size - PAGE_SIZE * 5)
1067            goto invalid;
1068    }
1069        
1070    user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0);
1071    *raddr = user_addr;
1072    err = 0;
1073    if (IS_ERR_VALUE(user_addr))
1074        err = (long)user_addr;
1075invalid:
1076    up_write(&current->mm->mmap_sem);
1077
1078out_fput:
1079    fput(file);
1080
1081out_nattch:
1082    down_write(&shm_ids(ns).rw_mutex);
1083    shp = shm_lock(ns, shmid);
1084    BUG_ON(IS_ERR(shp));
1085    shp->shm_nattch--;
1086    if (shm_may_destroy(ns, shp))
1087        shm_destroy(ns, shp);
1088    else
1089        shm_unlock(shp);
1090    up_write(&shm_ids(ns).rw_mutex);
1091
1092out:
1093    return err;
1094
1095out_unlock:
1096    shm_unlock(shp);
1097    goto out;
1098
1099out_free:
1100    kfree(sfd);
1101out_put_dentry:
1102    path_put(&path);
1103    goto out_nattch;
1104}
1105
1106SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1107{
1108    unsigned long ret;
1109    long err;
1110
1111    err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1112    if (err)
1113        return err;
1114    force_successful_syscall_return();
1115    return (long)ret;
1116}
1117
1118/*
1119 * detach and kill segment if marked destroyed.
1120 * The work is done in shm_close.
1121 */
1122SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1123{
1124    struct mm_struct *mm = current->mm;
1125    struct vm_area_struct *vma;
1126    unsigned long addr = (unsigned long)shmaddr;
1127    int retval = -EINVAL;
1128#ifdef CONFIG_MMU
1129    loff_t size = 0;
1130    struct vm_area_struct *next;
1131#endif
1132
1133    if (addr & ~PAGE_MASK)
1134        return retval;
1135
1136    down_write(&mm->mmap_sem);
1137
1138    /*
1139     * This function tries to be smart and unmap shm segments that
1140     * were modified by partial mlock or munmap calls:
1141     * - It first determines the size of the shm segment that should be
1142     * unmapped: It searches for a vma that is backed by shm and that
1143     * started at address shmaddr. It records it's size and then unmaps
1144     * it.
1145     * - Then it unmaps all shm vmas that started at shmaddr and that
1146     * are within the initially determined size.
1147     * Errors from do_munmap are ignored: the function only fails if
1148     * it's called with invalid parameters or if it's called to unmap
1149     * a part of a vma. Both calls in this function are for full vmas,
1150     * the parameters are directly copied from the vma itself and always
1151     * valid - therefore do_munmap cannot fail. (famous last words?)
1152     */
1153    /*
1154     * If it had been mremap()'d, the starting address would not
1155     * match the usual checks anyway. So assume all vma's are
1156     * above the starting address given.
1157     */
1158    vma = find_vma(mm, addr);
1159
1160#ifdef CONFIG_MMU
1161    while (vma) {
1162        next = vma->vm_next;
1163
1164        /*
1165         * Check if the starting address would match, i.e. it's
1166         * a fragment created by mprotect() and/or munmap(), or it
1167         * otherwise it starts at this address with no hassles.
1168         */
1169        if ((vma->vm_ops == &shm_vm_ops) &&
1170            (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1171
1172
1173            size = vma->vm_file->f_path.dentry->d_inode->i_size;
1174            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1175            /*
1176             * We discovered the size of the shm segment, so
1177             * break out of here and fall through to the next
1178             * loop that uses the size information to stop
1179             * searching for matching vma's.
1180             */
1181            retval = 0;
1182            vma = next;
1183            break;
1184        }
1185        vma = next;
1186    }
1187
1188    /*
1189     * We need look no further than the maximum address a fragment
1190     * could possibly have landed at. Also cast things to loff_t to
1191     * prevent overflows and make comparisons vs. equal-width types.
1192     */
1193    size = PAGE_ALIGN(size);
1194    while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1195        next = vma->vm_next;
1196
1197        /* finding a matching vma now does not alter retval */
1198        if ((vma->vm_ops == &shm_vm_ops) &&
1199            (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
1200
1201            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1202        vma = next;
1203    }
1204
1205#else /* CONFIG_MMU */
1206    /* under NOMMU conditions, the exact address to be destroyed must be
1207     * given */
1208    retval = -EINVAL;
1209    if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1210        do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1211        retval = 0;
1212    }
1213
1214#endif
1215
1216    up_write(&mm->mmap_sem);
1217    return retval;
1218}
1219
1220#ifdef CONFIG_PROC_FS
1221static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1222{
1223    struct shmid_kernel *shp = it;
1224    unsigned long rss = 0, swp = 0;
1225
1226    shm_add_rss_swap(shp, &rss, &swp);
1227
1228#if BITS_PER_LONG <= 32
1229#define SIZE_SPEC "%10lu"
1230#else
1231#define SIZE_SPEC "%21lu"
1232#endif
1233
1234    return seq_printf(s,
1235              "%10d %10d %4o " SIZE_SPEC " %5u %5u "
1236              "%5lu %5u %5u %5u %5u %10lu %10lu %10lu "
1237              SIZE_SPEC " " SIZE_SPEC "\n",
1238              shp->shm_perm.key,
1239              shp->shm_perm.id,
1240              shp->shm_perm.mode,
1241              shp->shm_segsz,
1242              shp->shm_cprid,
1243              shp->shm_lprid,
1244              shp->shm_nattch,
1245              shp->shm_perm.uid,
1246              shp->shm_perm.gid,
1247              shp->shm_perm.cuid,
1248              shp->shm_perm.cgid,
1249              shp->shm_atim,
1250              shp->shm_dtim,
1251              shp->shm_ctim,
1252              rss * PAGE_SIZE,
1253              swp * PAGE_SIZE);
1254}
1255#endif
1256

Archive Download this file



interactive