Root/ipc/shm.c

1/*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7 *
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15 *
16 * support for audit of ipc object properties and permission changes
17 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18 *
19 * namespaces support
20 * OpenVZ, SWsoft Inc.
21 * Pavel Emelianov <xemul@openvz.org>
22 *
23 * Better ipc lock (kern_ipc_perm.lock) handling
24 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
25 */
26
27#include <linux/slab.h>
28#include <linux/mm.h>
29#include <linux/hugetlb.h>
30#include <linux/shm.h>
31#include <linux/init.h>
32#include <linux/file.h>
33#include <linux/mman.h>
34#include <linux/shmem_fs.h>
35#include <linux/security.h>
36#include <linux/syscalls.h>
37#include <linux/audit.h>
38#include <linux/capability.h>
39#include <linux/ptrace.h>
40#include <linux/seq_file.h>
41#include <linux/rwsem.h>
42#include <linux/nsproxy.h>
43#include <linux/mount.h>
44#include <linux/ipc_namespace.h>
45
46#include <asm/uaccess.h>
47
48#include "util.h"
49
50struct shm_file_data {
51    int id;
52    struct ipc_namespace *ns;
53    struct file *file;
54    const struct vm_operations_struct *vm_ops;
55};
56
57#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
58
59static const struct file_operations shm_file_operations;
60static const struct vm_operations_struct shm_vm_ops;
61
62#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
63
64#define shm_unlock(shp) \
65    ipc_unlock(&(shp)->shm_perm)
66
67static int newseg(struct ipc_namespace *, struct ipc_params *);
68static void shm_open(struct vm_area_struct *vma);
69static void shm_close(struct vm_area_struct *vma);
70static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
71#ifdef CONFIG_PROC_FS
72static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
73#endif
74
75void shm_init_ns(struct ipc_namespace *ns)
76{
77    ns->shm_ctlmax = SHMMAX;
78    ns->shm_ctlall = SHMALL;
79    ns->shm_ctlmni = SHMMNI;
80    ns->shm_rmid_forced = 0;
81    ns->shm_tot = 0;
82    ipc_init_ids(&shm_ids(ns));
83}
84
85/*
86 * Called with shm_ids.rwsem (writer) and the shp structure locked.
87 * Only shm_ids.rwsem remains locked on exit.
88 */
89static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
90{
91    struct shmid_kernel *shp;
92    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
93
94    if (shp->shm_nattch) {
95        shp->shm_perm.mode |= SHM_DEST;
96        /* Do not find it any more */
97        shp->shm_perm.key = IPC_PRIVATE;
98        shm_unlock(shp);
99    } else
100        shm_destroy(ns, shp);
101}
102
103#ifdef CONFIG_IPC_NS
104void shm_exit_ns(struct ipc_namespace *ns)
105{
106    free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
107    idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
108}
109#endif
110
111static int __init ipc_ns_init(void)
112{
113    shm_init_ns(&init_ipc_ns);
114    return 0;
115}
116
117pure_initcall(ipc_ns_init);
118
119void __init shm_init(void)
120{
121    ipc_init_proc_interface("sysvipc/shm",
122#if BITS_PER_LONG <= 32
123                " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
124#else
125                " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
126#endif
127                IPC_SHM_IDS, sysvipc_shm_proc_show);
128}
129
130static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
131{
132    struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id);
133
134    if (IS_ERR(ipcp))
135        return ERR_CAST(ipcp);
136
137    return container_of(ipcp, struct shmid_kernel, shm_perm);
138}
139
140static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
141{
142    struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
143
144    if (IS_ERR(ipcp))
145        return ERR_CAST(ipcp);
146
147    return container_of(ipcp, struct shmid_kernel, shm_perm);
148}
149
150/*
151 * shm_lock_(check_) routines are called in the paths where the rwsem
152 * is not necessarily held.
153 */
154static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
155{
156    struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
157
158    if (IS_ERR(ipcp))
159        return (struct shmid_kernel *)ipcp;
160
161    return container_of(ipcp, struct shmid_kernel, shm_perm);
162}
163
164static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
165{
166    rcu_read_lock();
167    ipc_lock_object(&ipcp->shm_perm);
168}
169
170static void shm_rcu_free(struct rcu_head *head)
171{
172    struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
173    struct shmid_kernel *shp = ipc_rcu_to_struct(p);
174
175    security_shm_free(shp);
176    ipc_rcu_free(head);
177}
178
179static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
180{
181    ipc_rmid(&shm_ids(ns), &s->shm_perm);
182}
183
184
185/* This is called by fork, once for every shm attach. */
186static void shm_open(struct vm_area_struct *vma)
187{
188    struct file *file = vma->vm_file;
189    struct shm_file_data *sfd = shm_file_data(file);
190    struct shmid_kernel *shp;
191
192    shp = shm_lock(sfd->ns, sfd->id);
193    BUG_ON(IS_ERR(shp));
194    shp->shm_atim = get_seconds();
195    shp->shm_lprid = task_tgid_vnr(current);
196    shp->shm_nattch++;
197    shm_unlock(shp);
198}
199
200/*
201 * shm_destroy - free the struct shmid_kernel
202 *
203 * @ns: namespace
204 * @shp: struct to free
205 *
206 * It has to be called with shp and shm_ids.rwsem (writer) locked,
207 * but returns with shp unlocked and freed.
208 */
209static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
210{
211    struct file *shm_file;
212
213    shm_file = shp->shm_file;
214    shp->shm_file = NULL;
215    ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
216    shm_rmid(ns, shp);
217    shm_unlock(shp);
218    if (!is_file_hugepages(shm_file))
219        shmem_lock(shm_file, 0, shp->mlock_user);
220    else if (shp->mlock_user)
221        user_shm_unlock(file_inode(shm_file)->i_size, shp->mlock_user);
222    fput(shm_file);
223    ipc_rcu_putref(shp, shm_rcu_free);
224}
225
226/*
227 * shm_may_destroy - identifies whether shm segment should be destroyed now
228 *
229 * Returns true if and only if there are no active users of the segment and
230 * one of the following is true:
231 *
232 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
233 *
234 * 2) sysctl kernel.shm_rmid_forced is set to 1.
235 */
236static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
237{
238    return (shp->shm_nattch == 0) &&
239           (ns->shm_rmid_forced ||
240        (shp->shm_perm.mode & SHM_DEST));
241}
242
243/*
244 * remove the attach descriptor vma.
245 * free memory for segment if it is marked destroyed.
246 * The descriptor has already been removed from the current->mm->mmap list
247 * and will later be kfree()d.
248 */
249static void shm_close(struct vm_area_struct *vma)
250{
251    struct file *file = vma->vm_file;
252    struct shm_file_data *sfd = shm_file_data(file);
253    struct shmid_kernel *shp;
254    struct ipc_namespace *ns = sfd->ns;
255
256    down_write(&shm_ids(ns).rwsem);
257    /* remove from the list of attaches of the shm segment */
258    shp = shm_lock(ns, sfd->id);
259    BUG_ON(IS_ERR(shp));
260    shp->shm_lprid = task_tgid_vnr(current);
261    shp->shm_dtim = get_seconds();
262    shp->shm_nattch--;
263    if (shm_may_destroy(ns, shp))
264        shm_destroy(ns, shp);
265    else
266        shm_unlock(shp);
267    up_write(&shm_ids(ns).rwsem);
268}
269
270/* Called with ns->shm_ids(ns).rwsem locked */
271static int shm_try_destroy_current(int id, void *p, void *data)
272{
273    struct ipc_namespace *ns = data;
274    struct kern_ipc_perm *ipcp = p;
275    struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
276
277    if (shp->shm_creator != current)
278        return 0;
279
280    /*
281     * Mark it as orphaned to destroy the segment when
282     * kernel.shm_rmid_forced is changed.
283     * It is noop if the following shm_may_destroy() returns true.
284     */
285    shp->shm_creator = NULL;
286
287    /*
288     * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
289     * is not set, it shouldn't be deleted here.
290     */
291    if (!ns->shm_rmid_forced)
292        return 0;
293
294    if (shm_may_destroy(ns, shp)) {
295        shm_lock_by_ptr(shp);
296        shm_destroy(ns, shp);
297    }
298    return 0;
299}
300
301/* Called with ns->shm_ids(ns).rwsem locked */
302static int shm_try_destroy_orphaned(int id, void *p, void *data)
303{
304    struct ipc_namespace *ns = data;
305    struct kern_ipc_perm *ipcp = p;
306    struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
307
308    /*
309     * We want to destroy segments without users and with already
310     * exit'ed originating process.
311     *
312     * As shp->* are changed under rwsem, it's safe to skip shp locking.
313     */
314    if (shp->shm_creator != NULL)
315        return 0;
316
317    if (shm_may_destroy(ns, shp)) {
318        shm_lock_by_ptr(shp);
319        shm_destroy(ns, shp);
320    }
321    return 0;
322}
323
324void shm_destroy_orphaned(struct ipc_namespace *ns)
325{
326    down_write(&shm_ids(ns).rwsem);
327    if (shm_ids(ns).in_use)
328        idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
329    up_write(&shm_ids(ns).rwsem);
330}
331
332
333void exit_shm(struct task_struct *task)
334{
335    struct ipc_namespace *ns = task->nsproxy->ipc_ns;
336
337    if (shm_ids(ns).in_use == 0)
338        return;
339
340    /* Destroy all already created segments, but not mapped yet */
341    down_write(&shm_ids(ns).rwsem);
342    if (shm_ids(ns).in_use)
343        idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
344    up_write(&shm_ids(ns).rwsem);
345}
346
347static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
348{
349    struct file *file = vma->vm_file;
350    struct shm_file_data *sfd = shm_file_data(file);
351
352    return sfd->vm_ops->fault(vma, vmf);
353}
354
355#ifdef CONFIG_NUMA
356static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
357{
358    struct file *file = vma->vm_file;
359    struct shm_file_data *sfd = shm_file_data(file);
360    int err = 0;
361    if (sfd->vm_ops->set_policy)
362        err = sfd->vm_ops->set_policy(vma, new);
363    return err;
364}
365
366static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
367                    unsigned long addr)
368{
369    struct file *file = vma->vm_file;
370    struct shm_file_data *sfd = shm_file_data(file);
371    struct mempolicy *pol = NULL;
372
373    if (sfd->vm_ops->get_policy)
374        pol = sfd->vm_ops->get_policy(vma, addr);
375    else if (vma->vm_policy)
376        pol = vma->vm_policy;
377
378    return pol;
379}
380#endif
381
382static int shm_mmap(struct file *file, struct vm_area_struct *vma)
383{
384    struct shm_file_data *sfd = shm_file_data(file);
385    int ret;
386
387    ret = sfd->file->f_op->mmap(sfd->file, vma);
388    if (ret != 0)
389        return ret;
390    sfd->vm_ops = vma->vm_ops;
391#ifdef CONFIG_MMU
392    BUG_ON(!sfd->vm_ops->fault);
393#endif
394    vma->vm_ops = &shm_vm_ops;
395    shm_open(vma);
396
397    return ret;
398}
399
400static int shm_release(struct inode *ino, struct file *file)
401{
402    struct shm_file_data *sfd = shm_file_data(file);
403
404    put_ipc_ns(sfd->ns);
405    shm_file_data(file) = NULL;
406    kfree(sfd);
407    return 0;
408}
409
410static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
411{
412    struct shm_file_data *sfd = shm_file_data(file);
413
414    if (!sfd->file->f_op->fsync)
415        return -EINVAL;
416    return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
417}
418
419static long shm_fallocate(struct file *file, int mode, loff_t offset,
420              loff_t len)
421{
422    struct shm_file_data *sfd = shm_file_data(file);
423
424    if (!sfd->file->f_op->fallocate)
425        return -EOPNOTSUPP;
426    return sfd->file->f_op->fallocate(file, mode, offset, len);
427}
428
429static unsigned long shm_get_unmapped_area(struct file *file,
430    unsigned long addr, unsigned long len, unsigned long pgoff,
431    unsigned long flags)
432{
433    struct shm_file_data *sfd = shm_file_data(file);
434    return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
435                        pgoff, flags);
436}
437
438static const struct file_operations shm_file_operations = {
439    .mmap = shm_mmap,
440    .fsync = shm_fsync,
441    .release = shm_release,
442#ifndef CONFIG_MMU
443    .get_unmapped_area = shm_get_unmapped_area,
444#endif
445    .llseek = noop_llseek,
446    .fallocate = shm_fallocate,
447};
448
449static const struct file_operations shm_file_operations_huge = {
450    .mmap = shm_mmap,
451    .fsync = shm_fsync,
452    .release = shm_release,
453    .get_unmapped_area = shm_get_unmapped_area,
454    .llseek = noop_llseek,
455    .fallocate = shm_fallocate,
456};
457
458int is_file_shm_hugepages(struct file *file)
459{
460    return file->f_op == &shm_file_operations_huge;
461}
462
463static const struct vm_operations_struct shm_vm_ops = {
464    .open = shm_open, /* callback for a new vm-area open */
465    .close = shm_close, /* callback for when the vm-area is released */
466    .fault = shm_fault,
467#if defined(CONFIG_NUMA)
468    .set_policy = shm_set_policy,
469    .get_policy = shm_get_policy,
470#endif
471};
472
473/**
474 * newseg - Create a new shared memory segment
475 * @ns: namespace
476 * @params: ptr to the structure that contains key, size and shmflg
477 *
478 * Called with shm_ids.rwsem held as a writer.
479 */
480static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
481{
482    key_t key = params->key;
483    int shmflg = params->flg;
484    size_t size = params->u.size;
485    int error;
486    struct shmid_kernel *shp;
487    size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
488    struct file *file;
489    char name[13];
490    int id;
491    vm_flags_t acctflag = 0;
492
493    if (size < SHMMIN || size > ns->shm_ctlmax)
494        return -EINVAL;
495
496    if (ns->shm_tot + numpages > ns->shm_ctlall)
497        return -ENOSPC;
498
499    shp = ipc_rcu_alloc(sizeof(*shp));
500    if (!shp)
501        return -ENOMEM;
502
503    shp->shm_perm.key = key;
504    shp->shm_perm.mode = (shmflg & S_IRWXUGO);
505    shp->mlock_user = NULL;
506
507    shp->shm_perm.security = NULL;
508    error = security_shm_alloc(shp);
509    if (error) {
510        ipc_rcu_putref(shp, ipc_rcu_free);
511        return error;
512    }
513
514    sprintf(name, "SYSV%08x", key);
515    if (shmflg & SHM_HUGETLB) {
516        struct hstate *hs;
517        size_t hugesize;
518
519        hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
520        if (!hs) {
521            error = -EINVAL;
522            goto no_file;
523        }
524        hugesize = ALIGN(size, huge_page_size(hs));
525
526        /* hugetlb_file_setup applies strict accounting */
527        if (shmflg & SHM_NORESERVE)
528            acctflag = VM_NORESERVE;
529        file = hugetlb_file_setup(name, hugesize, acctflag,
530                  &shp->mlock_user, HUGETLB_SHMFS_INODE,
531                (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
532    } else {
533        /*
534         * Do not allow no accounting for OVERCOMMIT_NEVER, even
535         * if it's asked for.
536         */
537        if ((shmflg & SHM_NORESERVE) &&
538                sysctl_overcommit_memory != OVERCOMMIT_NEVER)
539            acctflag = VM_NORESERVE;
540        file = shmem_file_setup(name, size, acctflag);
541    }
542    error = PTR_ERR(file);
543    if (IS_ERR(file))
544        goto no_file;
545
546    id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
547    if (id < 0) {
548        error = id;
549        goto no_id;
550    }
551
552    shp->shm_cprid = task_tgid_vnr(current);
553    shp->shm_lprid = 0;
554    shp->shm_atim = shp->shm_dtim = 0;
555    shp->shm_ctim = get_seconds();
556    shp->shm_segsz = size;
557    shp->shm_nattch = 0;
558    shp->shm_file = file;
559    shp->shm_creator = current;
560
561    /*
562     * shmid gets reported as "inode#" in /proc/pid/maps.
563     * proc-ps tools use this. Changing this will break them.
564     */
565    file_inode(file)->i_ino = shp->shm_perm.id;
566
567    ns->shm_tot += numpages;
568    error = shp->shm_perm.id;
569
570    ipc_unlock_object(&shp->shm_perm);
571    rcu_read_unlock();
572    return error;
573
574no_id:
575    if (is_file_hugepages(file) && shp->mlock_user)
576        user_shm_unlock(size, shp->mlock_user);
577    fput(file);
578no_file:
579    ipc_rcu_putref(shp, shm_rcu_free);
580    return error;
581}
582
583/*
584 * Called with shm_ids.rwsem and ipcp locked.
585 */
586static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
587{
588    struct shmid_kernel *shp;
589
590    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
591    return security_shm_associate(shp, shmflg);
592}
593
594/*
595 * Called with shm_ids.rwsem and ipcp locked.
596 */
597static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
598                struct ipc_params *params)
599{
600    struct shmid_kernel *shp;
601
602    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
603    if (shp->shm_segsz < params->u.size)
604        return -EINVAL;
605
606    return 0;
607}
608
609SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
610{
611    struct ipc_namespace *ns;
612    struct ipc_ops shm_ops;
613    struct ipc_params shm_params;
614
615    ns = current->nsproxy->ipc_ns;
616
617    shm_ops.getnew = newseg;
618    shm_ops.associate = shm_security;
619    shm_ops.more_checks = shm_more_checks;
620
621    shm_params.key = key;
622    shm_params.flg = shmflg;
623    shm_params.u.size = size;
624
625    return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
626}
627
628static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
629{
630    switch (version) {
631    case IPC_64:
632        return copy_to_user(buf, in, sizeof(*in));
633    case IPC_OLD:
634        {
635        struct shmid_ds out;
636
637        memset(&out, 0, sizeof(out));
638        ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
639        out.shm_segsz = in->shm_segsz;
640        out.shm_atime = in->shm_atime;
641        out.shm_dtime = in->shm_dtime;
642        out.shm_ctime = in->shm_ctime;
643        out.shm_cpid = in->shm_cpid;
644        out.shm_lpid = in->shm_lpid;
645        out.shm_nattch = in->shm_nattch;
646
647        return copy_to_user(buf, &out, sizeof(out));
648        }
649    default:
650        return -EINVAL;
651    }
652}
653
654static inline unsigned long
655copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
656{
657    switch (version) {
658    case IPC_64:
659        if (copy_from_user(out, buf, sizeof(*out)))
660            return -EFAULT;
661        return 0;
662    case IPC_OLD:
663        {
664        struct shmid_ds tbuf_old;
665
666        if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
667            return -EFAULT;
668
669        out->shm_perm.uid = tbuf_old.shm_perm.uid;
670        out->shm_perm.gid = tbuf_old.shm_perm.gid;
671        out->shm_perm.mode = tbuf_old.shm_perm.mode;
672
673        return 0;
674        }
675    default:
676        return -EINVAL;
677    }
678}
679
680static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
681{
682    switch (version) {
683    case IPC_64:
684        return copy_to_user(buf, in, sizeof(*in));
685    case IPC_OLD:
686        {
687        struct shminfo out;
688
689        if (in->shmmax > INT_MAX)
690            out.shmmax = INT_MAX;
691        else
692            out.shmmax = (int)in->shmmax;
693
694        out.shmmin = in->shmmin;
695        out.shmmni = in->shmmni;
696        out.shmseg = in->shmseg;
697        out.shmall = in->shmall;
698
699        return copy_to_user(buf, &out, sizeof(out));
700        }
701    default:
702        return -EINVAL;
703    }
704}
705
706/*
707 * Calculate and add used RSS and swap pages of a shm.
708 * Called with shm_ids.rwsem held as a reader
709 */
710static void shm_add_rss_swap(struct shmid_kernel *shp,
711    unsigned long *rss_add, unsigned long *swp_add)
712{
713    struct inode *inode;
714
715    inode = file_inode(shp->shm_file);
716
717    if (is_file_hugepages(shp->shm_file)) {
718        struct address_space *mapping = inode->i_mapping;
719        struct hstate *h = hstate_file(shp->shm_file);
720        *rss_add += pages_per_huge_page(h) * mapping->nrpages;
721    } else {
722#ifdef CONFIG_SHMEM
723        struct shmem_inode_info *info = SHMEM_I(inode);
724        spin_lock(&info->lock);
725        *rss_add += inode->i_mapping->nrpages;
726        *swp_add += info->swapped;
727        spin_unlock(&info->lock);
728#else
729        *rss_add += inode->i_mapping->nrpages;
730#endif
731    }
732}
733
734/*
735 * Called with shm_ids.rwsem held as a reader
736 */
737static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
738        unsigned long *swp)
739{
740    int next_id;
741    int total, in_use;
742
743    *rss = 0;
744    *swp = 0;
745
746    in_use = shm_ids(ns).in_use;
747
748    for (total = 0, next_id = 0; total < in_use; next_id++) {
749        struct kern_ipc_perm *ipc;
750        struct shmid_kernel *shp;
751
752        ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
753        if (ipc == NULL)
754            continue;
755        shp = container_of(ipc, struct shmid_kernel, shm_perm);
756
757        shm_add_rss_swap(shp, rss, swp);
758
759        total++;
760    }
761}
762
763/*
764 * This function handles some shmctl commands which require the rwsem
765 * to be held in write mode.
766 * NOTE: no locks must be held, the rwsem is taken inside this function.
767 */
768static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
769               struct shmid_ds __user *buf, int version)
770{
771    struct kern_ipc_perm *ipcp;
772    struct shmid64_ds shmid64;
773    struct shmid_kernel *shp;
774    int err;
775
776    if (cmd == IPC_SET) {
777        if (copy_shmid_from_user(&shmid64, buf, version))
778            return -EFAULT;
779    }
780
781    down_write(&shm_ids(ns).rwsem);
782    rcu_read_lock();
783
784    ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
785                      &shmid64.shm_perm, 0);
786    if (IS_ERR(ipcp)) {
787        err = PTR_ERR(ipcp);
788        goto out_unlock1;
789    }
790
791    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
792
793    err = security_shm_shmctl(shp, cmd);
794    if (err)
795        goto out_unlock1;
796
797    switch (cmd) {
798    case IPC_RMID:
799        ipc_lock_object(&shp->shm_perm);
800        /* do_shm_rmid unlocks the ipc object and rcu */
801        do_shm_rmid(ns, ipcp);
802        goto out_up;
803    case IPC_SET:
804        ipc_lock_object(&shp->shm_perm);
805        err = ipc_update_perm(&shmid64.shm_perm, ipcp);
806        if (err)
807            goto out_unlock0;
808        shp->shm_ctim = get_seconds();
809        break;
810    default:
811        err = -EINVAL;
812        goto out_unlock1;
813    }
814
815out_unlock0:
816    ipc_unlock_object(&shp->shm_perm);
817out_unlock1:
818    rcu_read_unlock();
819out_up:
820    up_write(&shm_ids(ns).rwsem);
821    return err;
822}
823
824static int shmctl_nolock(struct ipc_namespace *ns, int shmid,
825             int cmd, int version, void __user *buf)
826{
827    int err;
828    struct shmid_kernel *shp;
829
830    /* preliminary security checks for *_INFO */
831    if (cmd == IPC_INFO || cmd == SHM_INFO) {
832        err = security_shm_shmctl(NULL, cmd);
833        if (err)
834            return err;
835    }
836
837    switch (cmd) {
838    case IPC_INFO:
839    {
840        struct shminfo64 shminfo;
841
842        memset(&shminfo, 0, sizeof(shminfo));
843        shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
844        shminfo.shmmax = ns->shm_ctlmax;
845        shminfo.shmall = ns->shm_ctlall;
846
847        shminfo.shmmin = SHMMIN;
848        if (copy_shminfo_to_user(buf, &shminfo, version))
849            return -EFAULT;
850
851        down_read(&shm_ids(ns).rwsem);
852        err = ipc_get_maxid(&shm_ids(ns));
853        up_read(&shm_ids(ns).rwsem);
854
855        if (err < 0)
856            err = 0;
857        goto out;
858    }
859    case SHM_INFO:
860    {
861        struct shm_info shm_info;
862
863        memset(&shm_info, 0, sizeof(shm_info));
864        down_read(&shm_ids(ns).rwsem);
865        shm_info.used_ids = shm_ids(ns).in_use;
866        shm_get_stat(ns, &shm_info.shm_rss, &shm_info.shm_swp);
867        shm_info.shm_tot = ns->shm_tot;
868        shm_info.swap_attempts = 0;
869        shm_info.swap_successes = 0;
870        err = ipc_get_maxid(&shm_ids(ns));
871        up_read(&shm_ids(ns).rwsem);
872        if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
873            err = -EFAULT;
874            goto out;
875        }
876
877        err = err < 0 ? 0 : err;
878        goto out;
879    }
880    case SHM_STAT:
881    case IPC_STAT:
882    {
883        struct shmid64_ds tbuf;
884        int result;
885
886        rcu_read_lock();
887        if (cmd == SHM_STAT) {
888            shp = shm_obtain_object(ns, shmid);
889            if (IS_ERR(shp)) {
890                err = PTR_ERR(shp);
891                goto out_unlock;
892            }
893            result = shp->shm_perm.id;
894        } else {
895            shp = shm_obtain_object_check(ns, shmid);
896            if (IS_ERR(shp)) {
897                err = PTR_ERR(shp);
898                goto out_unlock;
899            }
900            result = 0;
901        }
902
903        err = -EACCES;
904        if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
905            goto out_unlock;
906
907        err = security_shm_shmctl(shp, cmd);
908        if (err)
909            goto out_unlock;
910
911        memset(&tbuf, 0, sizeof(tbuf));
912        kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
913        tbuf.shm_segsz = shp->shm_segsz;
914        tbuf.shm_atime = shp->shm_atim;
915        tbuf.shm_dtime = shp->shm_dtim;
916        tbuf.shm_ctime = shp->shm_ctim;
917        tbuf.shm_cpid = shp->shm_cprid;
918        tbuf.shm_lpid = shp->shm_lprid;
919        tbuf.shm_nattch = shp->shm_nattch;
920        rcu_read_unlock();
921
922        if (copy_shmid_to_user(buf, &tbuf, version))
923            err = -EFAULT;
924        else
925            err = result;
926        goto out;
927    }
928    default:
929        return -EINVAL;
930    }
931
932out_unlock:
933    rcu_read_unlock();
934out:
935    return err;
936}
937
938SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
939{
940    struct shmid_kernel *shp;
941    int err, version;
942    struct ipc_namespace *ns;
943
944    if (cmd < 0 || shmid < 0)
945        return -EINVAL;
946
947    version = ipc_parse_version(&cmd);
948    ns = current->nsproxy->ipc_ns;
949
950    switch (cmd) {
951    case IPC_INFO:
952    case SHM_INFO:
953    case SHM_STAT:
954    case IPC_STAT:
955        return shmctl_nolock(ns, shmid, cmd, version, buf);
956    case IPC_RMID:
957    case IPC_SET:
958        return shmctl_down(ns, shmid, cmd, buf, version);
959    case SHM_LOCK:
960    case SHM_UNLOCK:
961    {
962        struct file *shm_file;
963
964        rcu_read_lock();
965        shp = shm_obtain_object_check(ns, shmid);
966        if (IS_ERR(shp)) {
967            err = PTR_ERR(shp);
968            goto out_unlock1;
969        }
970
971        audit_ipc_obj(&(shp->shm_perm));
972        err = security_shm_shmctl(shp, cmd);
973        if (err)
974            goto out_unlock1;
975
976        ipc_lock_object(&shp->shm_perm);
977
978        /* check if shm_destroy() is tearing down shp */
979        if (!ipc_valid_object(&shp->shm_perm)) {
980            err = -EIDRM;
981            goto out_unlock0;
982        }
983
984        if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
985            kuid_t euid = current_euid();
986            if (!uid_eq(euid, shp->shm_perm.uid) &&
987                !uid_eq(euid, shp->shm_perm.cuid)) {
988                err = -EPERM;
989                goto out_unlock0;
990            }
991            if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
992                err = -EPERM;
993                goto out_unlock0;
994            }
995        }
996
997        shm_file = shp->shm_file;
998        if (is_file_hugepages(shm_file))
999            goto out_unlock0;
1000
1001        if (cmd == SHM_LOCK) {
1002            struct user_struct *user = current_user();
1003            err = shmem_lock(shm_file, 1, user);
1004            if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1005                shp->shm_perm.mode |= SHM_LOCKED;
1006                shp->mlock_user = user;
1007            }
1008            goto out_unlock0;
1009        }
1010
1011        /* SHM_UNLOCK */
1012        if (!(shp->shm_perm.mode & SHM_LOCKED))
1013            goto out_unlock0;
1014        shmem_lock(shm_file, 0, shp->mlock_user);
1015        shp->shm_perm.mode &= ~SHM_LOCKED;
1016        shp->mlock_user = NULL;
1017        get_file(shm_file);
1018        ipc_unlock_object(&shp->shm_perm);
1019        rcu_read_unlock();
1020        shmem_unlock_mapping(shm_file->f_mapping);
1021
1022        fput(shm_file);
1023        return err;
1024    }
1025    default:
1026        return -EINVAL;
1027    }
1028
1029out_unlock0:
1030    ipc_unlock_object(&shp->shm_perm);
1031out_unlock1:
1032    rcu_read_unlock();
1033    return err;
1034}
1035
1036/*
1037 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1038 *
1039 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1040 * "raddr" thing points to kernel space, and there has to be a wrapper around
1041 * this.
1042 */
1043long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
1044          unsigned long shmlba)
1045{
1046    struct shmid_kernel *shp;
1047    unsigned long addr;
1048    unsigned long size;
1049    struct file *file;
1050    int err;
1051    unsigned long flags;
1052    unsigned long prot;
1053    int acc_mode;
1054    struct ipc_namespace *ns;
1055    struct shm_file_data *sfd;
1056    struct path path;
1057    fmode_t f_mode;
1058    unsigned long populate = 0;
1059
1060    err = -EINVAL;
1061    if (shmid < 0)
1062        goto out;
1063    else if ((addr = (ulong)shmaddr)) {
1064        if (addr & (shmlba - 1)) {
1065            if (shmflg & SHM_RND)
1066                addr &= ~(shmlba - 1); /* round down */
1067            else
1068#ifndef __ARCH_FORCE_SHMLBA
1069                if (addr & ~PAGE_MASK)
1070#endif
1071                    goto out;
1072        }
1073        flags = MAP_SHARED | MAP_FIXED;
1074    } else {
1075        if ((shmflg & SHM_REMAP))
1076            goto out;
1077
1078        flags = MAP_SHARED;
1079    }
1080
1081    if (shmflg & SHM_RDONLY) {
1082        prot = PROT_READ;
1083        acc_mode = S_IRUGO;
1084        f_mode = FMODE_READ;
1085    } else {
1086        prot = PROT_READ | PROT_WRITE;
1087        acc_mode = S_IRUGO | S_IWUGO;
1088        f_mode = FMODE_READ | FMODE_WRITE;
1089    }
1090    if (shmflg & SHM_EXEC) {
1091        prot |= PROT_EXEC;
1092        acc_mode |= S_IXUGO;
1093    }
1094
1095    /*
1096     * We cannot rely on the fs check since SYSV IPC does have an
1097     * additional creator id...
1098     */
1099    ns = current->nsproxy->ipc_ns;
1100    rcu_read_lock();
1101    shp = shm_obtain_object_check(ns, shmid);
1102    if (IS_ERR(shp)) {
1103        err = PTR_ERR(shp);
1104        goto out_unlock;
1105    }
1106
1107    err = -EACCES;
1108    if (ipcperms(ns, &shp->shm_perm, acc_mode))
1109        goto out_unlock;
1110
1111    err = security_shm_shmat(shp, shmaddr, shmflg);
1112    if (err)
1113        goto out_unlock;
1114
1115    ipc_lock_object(&shp->shm_perm);
1116
1117    /* check if shm_destroy() is tearing down shp */
1118    if (!ipc_valid_object(&shp->shm_perm)) {
1119        ipc_unlock_object(&shp->shm_perm);
1120        err = -EIDRM;
1121        goto out_unlock;
1122    }
1123
1124    path = shp->shm_file->f_path;
1125    path_get(&path);
1126    shp->shm_nattch++;
1127    size = i_size_read(path.dentry->d_inode);
1128    ipc_unlock_object(&shp->shm_perm);
1129    rcu_read_unlock();
1130
1131    err = -ENOMEM;
1132    sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1133    if (!sfd) {
1134        path_put(&path);
1135        goto out_nattch;
1136    }
1137
1138    file = alloc_file(&path, f_mode,
1139              is_file_hugepages(shp->shm_file) ?
1140                &shm_file_operations_huge :
1141                &shm_file_operations);
1142    err = PTR_ERR(file);
1143    if (IS_ERR(file)) {
1144        kfree(sfd);
1145        path_put(&path);
1146        goto out_nattch;
1147    }
1148
1149    file->private_data = sfd;
1150    file->f_mapping = shp->shm_file->f_mapping;
1151    sfd->id = shp->shm_perm.id;
1152    sfd->ns = get_ipc_ns(ns);
1153    sfd->file = shp->shm_file;
1154    sfd->vm_ops = NULL;
1155
1156    err = security_mmap_file(file, prot, flags);
1157    if (err)
1158        goto out_fput;
1159
1160    down_write(&current->mm->mmap_sem);
1161    if (addr && !(shmflg & SHM_REMAP)) {
1162        err = -EINVAL;
1163        if (find_vma_intersection(current->mm, addr, addr + size))
1164            goto invalid;
1165        /*
1166         * If shm segment goes below stack, make sure there is some
1167         * space left for the stack to grow (at least 4 pages).
1168         */
1169        if (addr < current->mm->start_stack &&
1170            addr > current->mm->start_stack - size - PAGE_SIZE * 5)
1171            goto invalid;
1172    }
1173
1174    addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
1175    *raddr = addr;
1176    err = 0;
1177    if (IS_ERR_VALUE(addr))
1178        err = (long)addr;
1179invalid:
1180    up_write(&current->mm->mmap_sem);
1181    if (populate)
1182        mm_populate(addr, populate);
1183
1184out_fput:
1185    fput(file);
1186
1187out_nattch:
1188    down_write(&shm_ids(ns).rwsem);
1189    shp = shm_lock(ns, shmid);
1190    BUG_ON(IS_ERR(shp));
1191    shp->shm_nattch--;
1192    if (shm_may_destroy(ns, shp))
1193        shm_destroy(ns, shp);
1194    else
1195        shm_unlock(shp);
1196    up_write(&shm_ids(ns).rwsem);
1197    return err;
1198
1199out_unlock:
1200    rcu_read_unlock();
1201out:
1202    return err;
1203}
1204
1205SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1206{
1207    unsigned long ret;
1208    long err;
1209
1210    err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1211    if (err)
1212        return err;
1213    force_successful_syscall_return();
1214    return (long)ret;
1215}
1216
1217/*
1218 * detach and kill segment if marked destroyed.
1219 * The work is done in shm_close.
1220 */
1221SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1222{
1223    struct mm_struct *mm = current->mm;
1224    struct vm_area_struct *vma;
1225    unsigned long addr = (unsigned long)shmaddr;
1226    int retval = -EINVAL;
1227#ifdef CONFIG_MMU
1228    loff_t size = 0;
1229    struct vm_area_struct *next;
1230#endif
1231
1232    if (addr & ~PAGE_MASK)
1233        return retval;
1234
1235    down_write(&mm->mmap_sem);
1236
1237    /*
1238     * This function tries to be smart and unmap shm segments that
1239     * were modified by partial mlock or munmap calls:
1240     * - It first determines the size of the shm segment that should be
1241     * unmapped: It searches for a vma that is backed by shm and that
1242     * started at address shmaddr. It records it's size and then unmaps
1243     * it.
1244     * - Then it unmaps all shm vmas that started at shmaddr and that
1245     * are within the initially determined size.
1246     * Errors from do_munmap are ignored: the function only fails if
1247     * it's called with invalid parameters or if it's called to unmap
1248     * a part of a vma. Both calls in this function are for full vmas,
1249     * the parameters are directly copied from the vma itself and always
1250     * valid - therefore do_munmap cannot fail. (famous last words?)
1251     */
1252    /*
1253     * If it had been mremap()'d, the starting address would not
1254     * match the usual checks anyway. So assume all vma's are
1255     * above the starting address given.
1256     */
1257    vma = find_vma(mm, addr);
1258
1259#ifdef CONFIG_MMU
1260    while (vma) {
1261        next = vma->vm_next;
1262
1263        /*
1264         * Check if the starting address would match, i.e. it's
1265         * a fragment created by mprotect() and/or munmap(), or it
1266         * otherwise it starts at this address with no hassles.
1267         */
1268        if ((vma->vm_ops == &shm_vm_ops) &&
1269            (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1270
1271
1272            size = file_inode(vma->vm_file)->i_size;
1273            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1274            /*
1275             * We discovered the size of the shm segment, so
1276             * break out of here and fall through to the next
1277             * loop that uses the size information to stop
1278             * searching for matching vma's.
1279             */
1280            retval = 0;
1281            vma = next;
1282            break;
1283        }
1284        vma = next;
1285    }
1286
1287    /*
1288     * We need look no further than the maximum address a fragment
1289     * could possibly have landed at. Also cast things to loff_t to
1290     * prevent overflows and make comparisons vs. equal-width types.
1291     */
1292    size = PAGE_ALIGN(size);
1293    while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1294        next = vma->vm_next;
1295
1296        /* finding a matching vma now does not alter retval */
1297        if ((vma->vm_ops == &shm_vm_ops) &&
1298            (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
1299
1300            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1301        vma = next;
1302    }
1303
1304#else /* CONFIG_MMU */
1305    /* under NOMMU conditions, the exact address to be destroyed must be
1306     * given */
1307    if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1308        do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1309        retval = 0;
1310    }
1311
1312#endif
1313
1314    up_write(&mm->mmap_sem);
1315    return retval;
1316}
1317
1318#ifdef CONFIG_PROC_FS
1319static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1320{
1321    struct user_namespace *user_ns = seq_user_ns(s);
1322    struct shmid_kernel *shp = it;
1323    unsigned long rss = 0, swp = 0;
1324
1325    shm_add_rss_swap(shp, &rss, &swp);
1326
1327#if BITS_PER_LONG <= 32
1328#define SIZE_SPEC "%10lu"
1329#else
1330#define SIZE_SPEC "%21lu"
1331#endif
1332
1333    return seq_printf(s,
1334              "%10d %10d %4o " SIZE_SPEC " %5u %5u "
1335              "%5lu %5u %5u %5u %5u %10lu %10lu %10lu "
1336              SIZE_SPEC " " SIZE_SPEC "\n",
1337              shp->shm_perm.key,
1338              shp->shm_perm.id,
1339              shp->shm_perm.mode,
1340              shp->shm_segsz,
1341              shp->shm_cprid,
1342              shp->shm_lprid,
1343              shp->shm_nattch,
1344              from_kuid_munged(user_ns, shp->shm_perm.uid),
1345              from_kgid_munged(user_ns, shp->shm_perm.gid),
1346              from_kuid_munged(user_ns, shp->shm_perm.cuid),
1347              from_kgid_munged(user_ns, shp->shm_perm.cgid),
1348              shp->shm_atim,
1349              shp->shm_dtim,
1350              shp->shm_ctim,
1351              rss * PAGE_SIZE,
1352              swp * PAGE_SIZE);
1353}
1354#endif
1355

Archive Download this file



interactive