Root/ipc/shm.c

1/*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7 *
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15 *
16 * support for audit of ipc object properties and permission changes
17 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18 *
19 * namespaces support
20 * OpenVZ, SWsoft Inc.
21 * Pavel Emelianov <xemul@openvz.org>
22 */
23
24#include <linux/slab.h>
25#include <linux/mm.h>
26#include <linux/hugetlb.h>
27#include <linux/shm.h>
28#include <linux/init.h>
29#include <linux/file.h>
30#include <linux/mman.h>
31#include <linux/shmem_fs.h>
32#include <linux/security.h>
33#include <linux/syscalls.h>
34#include <linux/audit.h>
35#include <linux/capability.h>
36#include <linux/ptrace.h>
37#include <linux/seq_file.h>
38#include <linux/rwsem.h>
39#include <linux/nsproxy.h>
40#include <linux/mount.h>
41#include <linux/ipc_namespace.h>
42
43#include <asm/uaccess.h>
44
45#include "util.h"
46
47struct shm_file_data {
48    int id;
49    struct ipc_namespace *ns;
50    struct file *file;
51    const struct vm_operations_struct *vm_ops;
52};
53
54#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
55
56static const struct file_operations shm_file_operations;
57static const struct vm_operations_struct shm_vm_ops;
58
59#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
60
61#define shm_unlock(shp) \
62    ipc_unlock(&(shp)->shm_perm)
63
64static int newseg(struct ipc_namespace *, struct ipc_params *);
65static void shm_open(struct vm_area_struct *vma);
66static void shm_close(struct vm_area_struct *vma);
67static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
68#ifdef CONFIG_PROC_FS
69static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
70#endif
71
72void shm_init_ns(struct ipc_namespace *ns)
73{
74    ns->shm_ctlmax = SHMMAX;
75    ns->shm_ctlall = SHMALL;
76    ns->shm_ctlmni = SHMMNI;
77    ns->shm_tot = 0;
78    ipc_init_ids(&shm_ids(ns));
79}
80
81/*
82 * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
83 * Only shm_ids.rw_mutex remains locked on exit.
84 */
85static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
86{
87    struct shmid_kernel *shp;
88    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
89
90    if (shp->shm_nattch){
91        shp->shm_perm.mode |= SHM_DEST;
92        /* Do not find it any more */
93        shp->shm_perm.key = IPC_PRIVATE;
94        shm_unlock(shp);
95    } else
96        shm_destroy(ns, shp);
97}
98
99#ifdef CONFIG_IPC_NS
100void shm_exit_ns(struct ipc_namespace *ns)
101{
102    free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
103    idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
104}
105#endif
106
107void __init shm_init (void)
108{
109    shm_init_ns(&init_ipc_ns);
110    ipc_init_proc_interface("sysvipc/shm",
111                " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n",
112                IPC_SHM_IDS, sysvipc_shm_proc_show);
113}
114
115/*
116 * shm_lock_(check_) routines are called in the paths where the rw_mutex
117 * is not necessarily held.
118 */
119static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
120{
121    struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
122
123    if (IS_ERR(ipcp))
124        return (struct shmid_kernel *)ipcp;
125
126    return container_of(ipcp, struct shmid_kernel, shm_perm);
127}
128
129static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
130                        int id)
131{
132    struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
133
134    if (IS_ERR(ipcp))
135        return (struct shmid_kernel *)ipcp;
136
137    return container_of(ipcp, struct shmid_kernel, shm_perm);
138}
139
140static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
141{
142    ipc_rmid(&shm_ids(ns), &s->shm_perm);
143}
144
145
146/* This is called by fork, once for every shm attach. */
147static void shm_open(struct vm_area_struct *vma)
148{
149    struct file *file = vma->vm_file;
150    struct shm_file_data *sfd = shm_file_data(file);
151    struct shmid_kernel *shp;
152
153    shp = shm_lock(sfd->ns, sfd->id);
154    BUG_ON(IS_ERR(shp));
155    shp->shm_atim = get_seconds();
156    shp->shm_lprid = task_tgid_vnr(current);
157    shp->shm_nattch++;
158    shm_unlock(shp);
159}
160
161/*
162 * shm_destroy - free the struct shmid_kernel
163 *
164 * @ns: namespace
165 * @shp: struct to free
166 *
167 * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
168 * but returns with shp unlocked and freed.
169 */
170static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
171{
172    ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
173    shm_rmid(ns, shp);
174    shm_unlock(shp);
175    if (!is_file_hugepages(shp->shm_file))
176        shmem_lock(shp->shm_file, 0, shp->mlock_user);
177    else if (shp->mlock_user)
178        user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size,
179                        shp->mlock_user);
180    fput (shp->shm_file);
181    security_shm_free(shp);
182    ipc_rcu_putref(shp);
183}
184
185/*
186 * remove the attach descriptor vma.
187 * free memory for segment if it is marked destroyed.
188 * The descriptor has already been removed from the current->mm->mmap list
189 * and will later be kfree()d.
190 */
191static void shm_close(struct vm_area_struct *vma)
192{
193    struct file * file = vma->vm_file;
194    struct shm_file_data *sfd = shm_file_data(file);
195    struct shmid_kernel *shp;
196    struct ipc_namespace *ns = sfd->ns;
197
198    down_write(&shm_ids(ns).rw_mutex);
199    /* remove from the list of attaches of the shm segment */
200    shp = shm_lock(ns, sfd->id);
201    BUG_ON(IS_ERR(shp));
202    shp->shm_lprid = task_tgid_vnr(current);
203    shp->shm_dtim = get_seconds();
204    shp->shm_nattch--;
205    if(shp->shm_nattch == 0 &&
206       shp->shm_perm.mode & SHM_DEST)
207        shm_destroy(ns, shp);
208    else
209        shm_unlock(shp);
210    up_write(&shm_ids(ns).rw_mutex);
211}
212
213static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
214{
215    struct file *file = vma->vm_file;
216    struct shm_file_data *sfd = shm_file_data(file);
217
218    return sfd->vm_ops->fault(vma, vmf);
219}
220
221#ifdef CONFIG_NUMA
222static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
223{
224    struct file *file = vma->vm_file;
225    struct shm_file_data *sfd = shm_file_data(file);
226    int err = 0;
227    if (sfd->vm_ops->set_policy)
228        err = sfd->vm_ops->set_policy(vma, new);
229    return err;
230}
231
232static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
233                    unsigned long addr)
234{
235    struct file *file = vma->vm_file;
236    struct shm_file_data *sfd = shm_file_data(file);
237    struct mempolicy *pol = NULL;
238
239    if (sfd->vm_ops->get_policy)
240        pol = sfd->vm_ops->get_policy(vma, addr);
241    else if (vma->vm_policy)
242        pol = vma->vm_policy;
243
244    return pol;
245}
246#endif
247
248static int shm_mmap(struct file * file, struct vm_area_struct * vma)
249{
250    struct shm_file_data *sfd = shm_file_data(file);
251    int ret;
252
253    ret = sfd->file->f_op->mmap(sfd->file, vma);
254    if (ret != 0)
255        return ret;
256    sfd->vm_ops = vma->vm_ops;
257#ifdef CONFIG_MMU
258    BUG_ON(!sfd->vm_ops->fault);
259#endif
260    vma->vm_ops = &shm_vm_ops;
261    shm_open(vma);
262
263    return ret;
264}
265
266static int shm_release(struct inode *ino, struct file *file)
267{
268    struct shm_file_data *sfd = shm_file_data(file);
269
270    put_ipc_ns(sfd->ns);
271    shm_file_data(file) = NULL;
272    kfree(sfd);
273    return 0;
274}
275
276static int shm_fsync(struct file *file, struct dentry *dentry, int datasync)
277{
278    int (*fsync) (struct file *, struct dentry *, int datasync);
279    struct shm_file_data *sfd = shm_file_data(file);
280    int ret = -EINVAL;
281
282    fsync = sfd->file->f_op->fsync;
283    if (fsync)
284        ret = fsync(sfd->file, sfd->file->f_path.dentry, datasync);
285    return ret;
286}
287
288static unsigned long shm_get_unmapped_area(struct file *file,
289    unsigned long addr, unsigned long len, unsigned long pgoff,
290    unsigned long flags)
291{
292    struct shm_file_data *sfd = shm_file_data(file);
293    return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
294                        pgoff, flags);
295}
296
297static const struct file_operations shm_file_operations = {
298    .mmap = shm_mmap,
299    .fsync = shm_fsync,
300    .release = shm_release,
301#ifndef CONFIG_MMU
302    .get_unmapped_area = shm_get_unmapped_area,
303#endif
304};
305
306static const struct file_operations shm_file_operations_huge = {
307    .mmap = shm_mmap,
308    .fsync = shm_fsync,
309    .release = shm_release,
310    .get_unmapped_area = shm_get_unmapped_area,
311};
312
313int is_file_shm_hugepages(struct file *file)
314{
315    return file->f_op == &shm_file_operations_huge;
316}
317
318static const struct vm_operations_struct shm_vm_ops = {
319    .open = shm_open, /* callback for a new vm-area open */
320    .close = shm_close, /* callback for when the vm-area is released */
321    .fault = shm_fault,
322#if defined(CONFIG_NUMA)
323    .set_policy = shm_set_policy,
324    .get_policy = shm_get_policy,
325#endif
326};
327
328/**
329 * newseg - Create a new shared memory segment
330 * @ns: namespace
331 * @params: ptr to the structure that contains key, size and shmflg
332 *
333 * Called with shm_ids.rw_mutex held as a writer.
334 */
335
336static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
337{
338    key_t key = params->key;
339    int shmflg = params->flg;
340    size_t size = params->u.size;
341    int error;
342    struct shmid_kernel *shp;
343    int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
344    struct file * file;
345    char name[13];
346    int id;
347    int acctflag = 0;
348
349    if (size < SHMMIN || size > ns->shm_ctlmax)
350        return -EINVAL;
351
352    if (ns->shm_tot + numpages > ns->shm_ctlall)
353        return -ENOSPC;
354
355    shp = ipc_rcu_alloc(sizeof(*shp));
356    if (!shp)
357        return -ENOMEM;
358
359    shp->shm_perm.key = key;
360    shp->shm_perm.mode = (shmflg & S_IRWXUGO);
361    shp->mlock_user = NULL;
362
363    shp->shm_perm.security = NULL;
364    error = security_shm_alloc(shp);
365    if (error) {
366        ipc_rcu_putref(shp);
367        return error;
368    }
369
370    sprintf (name, "SYSV%08x", key);
371    if (shmflg & SHM_HUGETLB) {
372        /* hugetlb_file_setup applies strict accounting */
373        if (shmflg & SHM_NORESERVE)
374            acctflag = VM_NORESERVE;
375        file = hugetlb_file_setup(name, size, acctflag,
376                    &shp->mlock_user, HUGETLB_SHMFS_INODE);
377    } else {
378        /*
379         * Do not allow no accounting for OVERCOMMIT_NEVER, even
380          * if it's asked for.
381         */
382        if ((shmflg & SHM_NORESERVE) &&
383                sysctl_overcommit_memory != OVERCOMMIT_NEVER)
384            acctflag = VM_NORESERVE;
385        file = shmem_file_setup(name, size, acctflag);
386    }
387    error = PTR_ERR(file);
388    if (IS_ERR(file))
389        goto no_file;
390
391    id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
392    if (id < 0) {
393        error = id;
394        goto no_id;
395    }
396
397    shp->shm_cprid = task_tgid_vnr(current);
398    shp->shm_lprid = 0;
399    shp->shm_atim = shp->shm_dtim = 0;
400    shp->shm_ctim = get_seconds();
401    shp->shm_segsz = size;
402    shp->shm_nattch = 0;
403    shp->shm_file = file;
404    /*
405     * shmid gets reported as "inode#" in /proc/pid/maps.
406     * proc-ps tools use this. Changing this will break them.
407     */
408    file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
409
410    ns->shm_tot += numpages;
411    error = shp->shm_perm.id;
412    shm_unlock(shp);
413    return error;
414
415no_id:
416    if (is_file_hugepages(file) && shp->mlock_user)
417        user_shm_unlock(size, shp->mlock_user);
418    fput(file);
419no_file:
420    security_shm_free(shp);
421    ipc_rcu_putref(shp);
422    return error;
423}
424
425/*
426 * Called with shm_ids.rw_mutex and ipcp locked.
427 */
428static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
429{
430    struct shmid_kernel *shp;
431
432    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
433    return security_shm_associate(shp, shmflg);
434}
435
436/*
437 * Called with shm_ids.rw_mutex and ipcp locked.
438 */
439static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
440                struct ipc_params *params)
441{
442    struct shmid_kernel *shp;
443
444    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
445    if (shp->shm_segsz < params->u.size)
446        return -EINVAL;
447
448    return 0;
449}
450
451SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
452{
453    struct ipc_namespace *ns;
454    struct ipc_ops shm_ops;
455    struct ipc_params shm_params;
456
457    ns = current->nsproxy->ipc_ns;
458
459    shm_ops.getnew = newseg;
460    shm_ops.associate = shm_security;
461    shm_ops.more_checks = shm_more_checks;
462
463    shm_params.key = key;
464    shm_params.flg = shmflg;
465    shm_params.u.size = size;
466
467    return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
468}
469
470static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
471{
472    switch(version) {
473    case IPC_64:
474        return copy_to_user(buf, in, sizeof(*in));
475    case IPC_OLD:
476        {
477        struct shmid_ds out;
478
479        ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
480        out.shm_segsz = in->shm_segsz;
481        out.shm_atime = in->shm_atime;
482        out.shm_dtime = in->shm_dtime;
483        out.shm_ctime = in->shm_ctime;
484        out.shm_cpid = in->shm_cpid;
485        out.shm_lpid = in->shm_lpid;
486        out.shm_nattch = in->shm_nattch;
487
488        return copy_to_user(buf, &out, sizeof(out));
489        }
490    default:
491        return -EINVAL;
492    }
493}
494
495static inline unsigned long
496copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
497{
498    switch(version) {
499    case IPC_64:
500        if (copy_from_user(out, buf, sizeof(*out)))
501            return -EFAULT;
502        return 0;
503    case IPC_OLD:
504        {
505        struct shmid_ds tbuf_old;
506
507        if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
508            return -EFAULT;
509
510        out->shm_perm.uid = tbuf_old.shm_perm.uid;
511        out->shm_perm.gid = tbuf_old.shm_perm.gid;
512        out->shm_perm.mode = tbuf_old.shm_perm.mode;
513
514        return 0;
515        }
516    default:
517        return -EINVAL;
518    }
519}
520
521static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
522{
523    switch(version) {
524    case IPC_64:
525        return copy_to_user(buf, in, sizeof(*in));
526    case IPC_OLD:
527        {
528        struct shminfo out;
529
530        if(in->shmmax > INT_MAX)
531            out.shmmax = INT_MAX;
532        else
533            out.shmmax = (int)in->shmmax;
534
535        out.shmmin = in->shmmin;
536        out.shmmni = in->shmmni;
537        out.shmseg = in->shmseg;
538        out.shmall = in->shmall;
539
540        return copy_to_user(buf, &out, sizeof(out));
541        }
542    default:
543        return -EINVAL;
544    }
545}
546
547/*
548 * Called with shm_ids.rw_mutex held as a reader
549 */
550static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
551        unsigned long *swp)
552{
553    int next_id;
554    int total, in_use;
555
556    *rss = 0;
557    *swp = 0;
558
559    in_use = shm_ids(ns).in_use;
560
561    for (total = 0, next_id = 0; total < in_use; next_id++) {
562        struct kern_ipc_perm *ipc;
563        struct shmid_kernel *shp;
564        struct inode *inode;
565
566        ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
567        if (ipc == NULL)
568            continue;
569        shp = container_of(ipc, struct shmid_kernel, shm_perm);
570
571        inode = shp->shm_file->f_path.dentry->d_inode;
572
573        if (is_file_hugepages(shp->shm_file)) {
574            struct address_space *mapping = inode->i_mapping;
575            struct hstate *h = hstate_file(shp->shm_file);
576            *rss += pages_per_huge_page(h) * mapping->nrpages;
577        } else {
578#ifdef CONFIG_SHMEM
579            struct shmem_inode_info *info = SHMEM_I(inode);
580            spin_lock(&info->lock);
581            *rss += inode->i_mapping->nrpages;
582            *swp += info->swapped;
583            spin_unlock(&info->lock);
584#else
585            *rss += inode->i_mapping->nrpages;
586#endif
587        }
588
589        total++;
590    }
591}
592
593/*
594 * This function handles some shmctl commands which require the rw_mutex
595 * to be held in write mode.
596 * NOTE: no locks must be held, the rw_mutex is taken inside this function.
597 */
598static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
599               struct shmid_ds __user *buf, int version)
600{
601    struct kern_ipc_perm *ipcp;
602    struct shmid64_ds shmid64;
603    struct shmid_kernel *shp;
604    int err;
605
606    if (cmd == IPC_SET) {
607        if (copy_shmid_from_user(&shmid64, buf, version))
608            return -EFAULT;
609    }
610
611    ipcp = ipcctl_pre_down(&shm_ids(ns), shmid, cmd, &shmid64.shm_perm, 0);
612    if (IS_ERR(ipcp))
613        return PTR_ERR(ipcp);
614
615    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
616
617    err = security_shm_shmctl(shp, cmd);
618    if (err)
619        goto out_unlock;
620    switch (cmd) {
621    case IPC_RMID:
622        do_shm_rmid(ns, ipcp);
623        goto out_up;
624    case IPC_SET:
625        ipc_update_perm(&shmid64.shm_perm, ipcp);
626        shp->shm_ctim = get_seconds();
627        break;
628    default:
629        err = -EINVAL;
630    }
631out_unlock:
632    shm_unlock(shp);
633out_up:
634    up_write(&shm_ids(ns).rw_mutex);
635    return err;
636}
637
638SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
639{
640    struct shmid_kernel *shp;
641    int err, version;
642    struct ipc_namespace *ns;
643
644    if (cmd < 0 || shmid < 0) {
645        err = -EINVAL;
646        goto out;
647    }
648
649    version = ipc_parse_version(&cmd);
650    ns = current->nsproxy->ipc_ns;
651
652    switch (cmd) { /* replace with proc interface ? */
653    case IPC_INFO:
654    {
655        struct shminfo64 shminfo;
656
657        err = security_shm_shmctl(NULL, cmd);
658        if (err)
659            return err;
660
661        memset(&shminfo, 0, sizeof(shminfo));
662        shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
663        shminfo.shmmax = ns->shm_ctlmax;
664        shminfo.shmall = ns->shm_ctlall;
665
666        shminfo.shmmin = SHMMIN;
667        if(copy_shminfo_to_user (buf, &shminfo, version))
668            return -EFAULT;
669
670        down_read(&shm_ids(ns).rw_mutex);
671        err = ipc_get_maxid(&shm_ids(ns));
672        up_read(&shm_ids(ns).rw_mutex);
673
674        if(err<0)
675            err = 0;
676        goto out;
677    }
678    case SHM_INFO:
679    {
680        struct shm_info shm_info;
681
682        err = security_shm_shmctl(NULL, cmd);
683        if (err)
684            return err;
685
686        memset(&shm_info, 0, sizeof(shm_info));
687        down_read(&shm_ids(ns).rw_mutex);
688        shm_info.used_ids = shm_ids(ns).in_use;
689        shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
690        shm_info.shm_tot = ns->shm_tot;
691        shm_info.swap_attempts = 0;
692        shm_info.swap_successes = 0;
693        err = ipc_get_maxid(&shm_ids(ns));
694        up_read(&shm_ids(ns).rw_mutex);
695        if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
696            err = -EFAULT;
697            goto out;
698        }
699
700        err = err < 0 ? 0 : err;
701        goto out;
702    }
703    case SHM_STAT:
704    case IPC_STAT:
705    {
706        struct shmid64_ds tbuf;
707        int result;
708
709        if (cmd == SHM_STAT) {
710            shp = shm_lock(ns, shmid);
711            if (IS_ERR(shp)) {
712                err = PTR_ERR(shp);
713                goto out;
714            }
715            result = shp->shm_perm.id;
716        } else {
717            shp = shm_lock_check(ns, shmid);
718            if (IS_ERR(shp)) {
719                err = PTR_ERR(shp);
720                goto out;
721            }
722            result = 0;
723        }
724        err = -EACCES;
725        if (ipcperms (&shp->shm_perm, S_IRUGO))
726            goto out_unlock;
727        err = security_shm_shmctl(shp, cmd);
728        if (err)
729            goto out_unlock;
730        memset(&tbuf, 0, sizeof(tbuf));
731        kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
732        tbuf.shm_segsz = shp->shm_segsz;
733        tbuf.shm_atime = shp->shm_atim;
734        tbuf.shm_dtime = shp->shm_dtim;
735        tbuf.shm_ctime = shp->shm_ctim;
736        tbuf.shm_cpid = shp->shm_cprid;
737        tbuf.shm_lpid = shp->shm_lprid;
738        tbuf.shm_nattch = shp->shm_nattch;
739        shm_unlock(shp);
740        if(copy_shmid_to_user (buf, &tbuf, version))
741            err = -EFAULT;
742        else
743            err = result;
744        goto out;
745    }
746    case SHM_LOCK:
747    case SHM_UNLOCK:
748    {
749        struct file *uninitialized_var(shm_file);
750
751        lru_add_drain_all(); /* drain pagevecs to lru lists */
752
753        shp = shm_lock_check(ns, shmid);
754        if (IS_ERR(shp)) {
755            err = PTR_ERR(shp);
756            goto out;
757        }
758
759        audit_ipc_obj(&(shp->shm_perm));
760
761        if (!capable(CAP_IPC_LOCK)) {
762            uid_t euid = current_euid();
763            err = -EPERM;
764            if (euid != shp->shm_perm.uid &&
765                euid != shp->shm_perm.cuid)
766                goto out_unlock;
767            if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
768                goto out_unlock;
769        }
770
771        err = security_shm_shmctl(shp, cmd);
772        if (err)
773            goto out_unlock;
774        
775        if(cmd==SHM_LOCK) {
776            struct user_struct *user = current_user();
777            if (!is_file_hugepages(shp->shm_file)) {
778                err = shmem_lock(shp->shm_file, 1, user);
779                if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){
780                    shp->shm_perm.mode |= SHM_LOCKED;
781                    shp->mlock_user = user;
782                }
783            }
784        } else if (!is_file_hugepages(shp->shm_file)) {
785            shmem_lock(shp->shm_file, 0, shp->mlock_user);
786            shp->shm_perm.mode &= ~SHM_LOCKED;
787            shp->mlock_user = NULL;
788        }
789        shm_unlock(shp);
790        goto out;
791    }
792    case IPC_RMID:
793    case IPC_SET:
794        err = shmctl_down(ns, shmid, cmd, buf, version);
795        return err;
796    default:
797        return -EINVAL;
798    }
799
800out_unlock:
801    shm_unlock(shp);
802out:
803    return err;
804}
805
806/*
807 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
808 *
809 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
810 * "raddr" thing points to kernel space, and there has to be a wrapper around
811 * this.
812 */
813long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
814{
815    struct shmid_kernel *shp;
816    unsigned long addr;
817    unsigned long size;
818    struct file * file;
819    int err;
820    unsigned long flags;
821    unsigned long prot;
822    int acc_mode;
823    unsigned long user_addr;
824    struct ipc_namespace *ns;
825    struct shm_file_data *sfd;
826    struct path path;
827    fmode_t f_mode;
828
829    err = -EINVAL;
830    if (shmid < 0)
831        goto out;
832    else if ((addr = (ulong)shmaddr)) {
833        if (addr & (SHMLBA-1)) {
834            if (shmflg & SHM_RND)
835                addr &= ~(SHMLBA-1); /* round down */
836            else
837#ifndef __ARCH_FORCE_SHMLBA
838                if (addr & ~PAGE_MASK)
839#endif
840                    goto out;
841        }
842        flags = MAP_SHARED | MAP_FIXED;
843    } else {
844        if ((shmflg & SHM_REMAP))
845            goto out;
846
847        flags = MAP_SHARED;
848    }
849
850    if (shmflg & SHM_RDONLY) {
851        prot = PROT_READ;
852        acc_mode = S_IRUGO;
853        f_mode = FMODE_READ;
854    } else {
855        prot = PROT_READ | PROT_WRITE;
856        acc_mode = S_IRUGO | S_IWUGO;
857        f_mode = FMODE_READ | FMODE_WRITE;
858    }
859    if (shmflg & SHM_EXEC) {
860        prot |= PROT_EXEC;
861        acc_mode |= S_IXUGO;
862    }
863
864    /*
865     * We cannot rely on the fs check since SYSV IPC does have an
866     * additional creator id...
867     */
868    ns = current->nsproxy->ipc_ns;
869    shp = shm_lock_check(ns, shmid);
870    if (IS_ERR(shp)) {
871        err = PTR_ERR(shp);
872        goto out;
873    }
874
875    err = -EACCES;
876    if (ipcperms(&shp->shm_perm, acc_mode))
877        goto out_unlock;
878
879    err = security_shm_shmat(shp, shmaddr, shmflg);
880    if (err)
881        goto out_unlock;
882
883    path = shp->shm_file->f_path;
884    path_get(&path);
885    shp->shm_nattch++;
886    size = i_size_read(path.dentry->d_inode);
887    shm_unlock(shp);
888
889    err = -ENOMEM;
890    sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
891    if (!sfd)
892        goto out_put_dentry;
893
894    file = alloc_file(&path, f_mode,
895              is_file_hugepages(shp->shm_file) ?
896                &shm_file_operations_huge :
897                &shm_file_operations);
898    if (!file)
899        goto out_free;
900
901    file->private_data = sfd;
902    file->f_mapping = shp->shm_file->f_mapping;
903    sfd->id = shp->shm_perm.id;
904    sfd->ns = get_ipc_ns(ns);
905    sfd->file = shp->shm_file;
906    sfd->vm_ops = NULL;
907
908    down_write(&current->mm->mmap_sem);
909    if (addr && !(shmflg & SHM_REMAP)) {
910        err = -EINVAL;
911        if (find_vma_intersection(current->mm, addr, addr + size))
912            goto invalid;
913        /*
914         * If shm segment goes below stack, make sure there is some
915         * space left for the stack to grow (at least 4 pages).
916         */
917        if (addr < current->mm->start_stack &&
918            addr > current->mm->start_stack - size - PAGE_SIZE * 5)
919            goto invalid;
920    }
921        
922    user_addr = do_mmap (file, addr, size, prot, flags, 0);
923    *raddr = user_addr;
924    err = 0;
925    if (IS_ERR_VALUE(user_addr))
926        err = (long)user_addr;
927invalid:
928    up_write(&current->mm->mmap_sem);
929
930    fput(file);
931
932out_nattch:
933    down_write(&shm_ids(ns).rw_mutex);
934    shp = shm_lock(ns, shmid);
935    BUG_ON(IS_ERR(shp));
936    shp->shm_nattch--;
937    if(shp->shm_nattch == 0 &&
938       shp->shm_perm.mode & SHM_DEST)
939        shm_destroy(ns, shp);
940    else
941        shm_unlock(shp);
942    up_write(&shm_ids(ns).rw_mutex);
943
944out:
945    return err;
946
947out_unlock:
948    shm_unlock(shp);
949    goto out;
950
951out_free:
952    kfree(sfd);
953out_put_dentry:
954    path_put(&path);
955    goto out_nattch;
956}
957
958SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
959{
960    unsigned long ret;
961    long err;
962
963    err = do_shmat(shmid, shmaddr, shmflg, &ret);
964    if (err)
965        return err;
966    force_successful_syscall_return();
967    return (long)ret;
968}
969
970/*
971 * detach and kill segment if marked destroyed.
972 * The work is done in shm_close.
973 */
974SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
975{
976    struct mm_struct *mm = current->mm;
977    struct vm_area_struct *vma;
978    unsigned long addr = (unsigned long)shmaddr;
979    int retval = -EINVAL;
980#ifdef CONFIG_MMU
981    loff_t size = 0;
982    struct vm_area_struct *next;
983#endif
984
985    if (addr & ~PAGE_MASK)
986        return retval;
987
988    down_write(&mm->mmap_sem);
989
990    /*
991     * This function tries to be smart and unmap shm segments that
992     * were modified by partial mlock or munmap calls:
993     * - It first determines the size of the shm segment that should be
994     * unmapped: It searches for a vma that is backed by shm and that
995     * started at address shmaddr. It records it's size and then unmaps
996     * it.
997     * - Then it unmaps all shm vmas that started at shmaddr and that
998     * are within the initially determined size.
999     * Errors from do_munmap are ignored: the function only fails if
1000     * it's called with invalid parameters or if it's called to unmap
1001     * a part of a vma. Both calls in this function are for full vmas,
1002     * the parameters are directly copied from the vma itself and always
1003     * valid - therefore do_munmap cannot fail. (famous last words?)
1004     */
1005    /*
1006     * If it had been mremap()'d, the starting address would not
1007     * match the usual checks anyway. So assume all vma's are
1008     * above the starting address given.
1009     */
1010    vma = find_vma(mm, addr);
1011
1012#ifdef CONFIG_MMU
1013    while (vma) {
1014        next = vma->vm_next;
1015
1016        /*
1017         * Check if the starting address would match, i.e. it's
1018         * a fragment created by mprotect() and/or munmap(), or it
1019         * otherwise it starts at this address with no hassles.
1020         */
1021        if ((vma->vm_ops == &shm_vm_ops) &&
1022            (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1023
1024
1025            size = vma->vm_file->f_path.dentry->d_inode->i_size;
1026            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1027            /*
1028             * We discovered the size of the shm segment, so
1029             * break out of here and fall through to the next
1030             * loop that uses the size information to stop
1031             * searching for matching vma's.
1032             */
1033            retval = 0;
1034            vma = next;
1035            break;
1036        }
1037        vma = next;
1038    }
1039
1040    /*
1041     * We need look no further than the maximum address a fragment
1042     * could possibly have landed at. Also cast things to loff_t to
1043     * prevent overflows and make comparisions vs. equal-width types.
1044     */
1045    size = PAGE_ALIGN(size);
1046    while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1047        next = vma->vm_next;
1048
1049        /* finding a matching vma now does not alter retval */
1050        if ((vma->vm_ops == &shm_vm_ops) &&
1051            (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
1052
1053            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1054        vma = next;
1055    }
1056
1057#else /* CONFIG_MMU */
1058    /* under NOMMU conditions, the exact address to be destroyed must be
1059     * given */
1060    retval = -EINVAL;
1061    if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1062        do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1063        retval = 0;
1064    }
1065
1066#endif
1067
1068    up_write(&mm->mmap_sem);
1069    return retval;
1070}
1071
1072#ifdef CONFIG_PROC_FS
1073static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1074{
1075    struct shmid_kernel *shp = it;
1076
1077#if BITS_PER_LONG <= 32
1078#define SIZE_SPEC "%10lu"
1079#else
1080#define SIZE_SPEC "%21lu"
1081#endif
1082
1083    return seq_printf(s,
1084              "%10d %10d %4o " SIZE_SPEC " %5u %5u "
1085              "%5lu %5u %5u %5u %5u %10lu %10lu %10lu\n",
1086              shp->shm_perm.key,
1087              shp->shm_perm.id,
1088              shp->shm_perm.mode,
1089              shp->shm_segsz,
1090              shp->shm_cprid,
1091              shp->shm_lprid,
1092              shp->shm_nattch,
1093              shp->shm_perm.uid,
1094              shp->shm_perm.gid,
1095              shp->shm_perm.cuid,
1096              shp->shm_perm.cgid,
1097              shp->shm_atim,
1098              shp->shm_dtim,
1099              shp->shm_ctim);
1100}
1101#endif
1102

Archive Download this file



interactive