Root/ipc/shm.c

Source at commit fbf123cd4cc0c097fe9a99c90109ebb2a5e94a50 created 7 years 11 months ago.
By Lars-Peter Clausen, dma: jz4740: Dequeue descriptor from active list before completing it
1/*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7 *
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15 *
16 * support for audit of ipc object properties and permission changes
17 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18 *
19 * namespaces support
20 * OpenVZ, SWsoft Inc.
21 * Pavel Emelianov <xemul@openvz.org>
22 *
23 * Better ipc lock (kern_ipc_perm.lock) handling
24 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
25 */
26
27#include <linux/slab.h>
28#include <linux/mm.h>
29#include <linux/hugetlb.h>
30#include <linux/shm.h>
31#include <linux/init.h>
32#include <linux/file.h>
33#include <linux/mman.h>
34#include <linux/shmem_fs.h>
35#include <linux/security.h>
36#include <linux/syscalls.h>
37#include <linux/audit.h>
38#include <linux/capability.h>
39#include <linux/ptrace.h>
40#include <linux/seq_file.h>
41#include <linux/rwsem.h>
42#include <linux/nsproxy.h>
43#include <linux/mount.h>
44#include <linux/ipc_namespace.h>
45
46#include <linux/uaccess.h>
47
48#include "util.h"
49
50struct shm_file_data {
51    int id;
52    struct ipc_namespace *ns;
53    struct file *file;
54    const struct vm_operations_struct *vm_ops;
55};
56
57#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
58
59static const struct file_operations shm_file_operations;
60static const struct vm_operations_struct shm_vm_ops;
61
62#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
63
64#define shm_unlock(shp) \
65    ipc_unlock(&(shp)->shm_perm)
66
67static int newseg(struct ipc_namespace *, struct ipc_params *);
68static void shm_open(struct vm_area_struct *vma);
69static void shm_close(struct vm_area_struct *vma);
70static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
71#ifdef CONFIG_PROC_FS
72static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
73#endif
74
75void shm_init_ns(struct ipc_namespace *ns)
76{
77    ns->shm_ctlmax = SHMMAX;
78    ns->shm_ctlall = SHMALL;
79    ns->shm_ctlmni = SHMMNI;
80    ns->shm_rmid_forced = 0;
81    ns->shm_tot = 0;
82    ipc_init_ids(&shm_ids(ns));
83}
84
85/*
86 * Called with shm_ids.rwsem (writer) and the shp structure locked.
87 * Only shm_ids.rwsem remains locked on exit.
88 */
89static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
90{
91    struct shmid_kernel *shp;
92    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
93
94    if (shp->shm_nattch) {
95        shp->shm_perm.mode |= SHM_DEST;
96        /* Do not find it any more */
97        shp->shm_perm.key = IPC_PRIVATE;
98        shm_unlock(shp);
99    } else
100        shm_destroy(ns, shp);
101}
102
103#ifdef CONFIG_IPC_NS
104void shm_exit_ns(struct ipc_namespace *ns)
105{
106    free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
107    idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
108}
109#endif
110
111static int __init ipc_ns_init(void)
112{
113    shm_init_ns(&init_ipc_ns);
114    return 0;
115}
116
117pure_initcall(ipc_ns_init);
118
119void __init shm_init(void)
120{
121    ipc_init_proc_interface("sysvipc/shm",
122#if BITS_PER_LONG <= 32
123                " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
124#else
125                " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
126#endif
127                IPC_SHM_IDS, sysvipc_shm_proc_show);
128}
129
130static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
131{
132    struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id);
133
134    if (IS_ERR(ipcp))
135        return ERR_CAST(ipcp);
136
137    return container_of(ipcp, struct shmid_kernel, shm_perm);
138}
139
140static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
141{
142    struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
143
144    if (IS_ERR(ipcp))
145        return ERR_CAST(ipcp);
146
147    return container_of(ipcp, struct shmid_kernel, shm_perm);
148}
149
150/*
151 * shm_lock_(check_) routines are called in the paths where the rwsem
152 * is not necessarily held.
153 */
154static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
155{
156    struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
157
158    if (IS_ERR(ipcp))
159        return (struct shmid_kernel *)ipcp;
160
161    return container_of(ipcp, struct shmid_kernel, shm_perm);
162}
163
164static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
165{
166    rcu_read_lock();
167    ipc_lock_object(&ipcp->shm_perm);
168}
169
170static void shm_rcu_free(struct rcu_head *head)
171{
172    struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
173    struct shmid_kernel *shp = ipc_rcu_to_struct(p);
174
175    security_shm_free(shp);
176    ipc_rcu_free(head);
177}
178
179static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
180{
181    ipc_rmid(&shm_ids(ns), &s->shm_perm);
182}
183
184
185/* This is called by fork, once for every shm attach. */
186static void shm_open(struct vm_area_struct *vma)
187{
188    struct file *file = vma->vm_file;
189    struct shm_file_data *sfd = shm_file_data(file);
190    struct shmid_kernel *shp;
191
192    shp = shm_lock(sfd->ns, sfd->id);
193    BUG_ON(IS_ERR(shp));
194    shp->shm_atim = get_seconds();
195    shp->shm_lprid = task_tgid_vnr(current);
196    shp->shm_nattch++;
197    shm_unlock(shp);
198}
199
200/*
201 * shm_destroy - free the struct shmid_kernel
202 *
203 * @ns: namespace
204 * @shp: struct to free
205 *
206 * It has to be called with shp and shm_ids.rwsem (writer) locked,
207 * but returns with shp unlocked and freed.
208 */
209static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
210{
211    struct file *shm_file;
212
213    shm_file = shp->shm_file;
214    shp->shm_file = NULL;
215    ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
216    shm_rmid(ns, shp);
217    shm_unlock(shp);
218    if (!is_file_hugepages(shm_file))
219        shmem_lock(shm_file, 0, shp->mlock_user);
220    else if (shp->mlock_user)
221        user_shm_unlock(file_inode(shm_file)->i_size, shp->mlock_user);
222    fput(shm_file);
223    ipc_rcu_putref(shp, shm_rcu_free);
224}
225
226/*
227 * shm_may_destroy - identifies whether shm segment should be destroyed now
228 *
229 * Returns true if and only if there are no active users of the segment and
230 * one of the following is true:
231 *
232 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
233 *
234 * 2) sysctl kernel.shm_rmid_forced is set to 1.
235 */
236static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
237{
238    return (shp->shm_nattch == 0) &&
239           (ns->shm_rmid_forced ||
240        (shp->shm_perm.mode & SHM_DEST));
241}
242
243/*
244 * remove the attach descriptor vma.
245 * free memory for segment if it is marked destroyed.
246 * The descriptor has already been removed from the current->mm->mmap list
247 * and will later be kfree()d.
248 */
249static void shm_close(struct vm_area_struct *vma)
250{
251    struct file *file = vma->vm_file;
252    struct shm_file_data *sfd = shm_file_data(file);
253    struct shmid_kernel *shp;
254    struct ipc_namespace *ns = sfd->ns;
255
256    down_write(&shm_ids(ns).rwsem);
257    /* remove from the list of attaches of the shm segment */
258    shp = shm_lock(ns, sfd->id);
259    BUG_ON(IS_ERR(shp));
260    shp->shm_lprid = task_tgid_vnr(current);
261    shp->shm_dtim = get_seconds();
262    shp->shm_nattch--;
263    if (shm_may_destroy(ns, shp))
264        shm_destroy(ns, shp);
265    else
266        shm_unlock(shp);
267    up_write(&shm_ids(ns).rwsem);
268}
269
270/* Called with ns->shm_ids(ns).rwsem locked */
271static int shm_try_destroy_current(int id, void *p, void *data)
272{
273    struct ipc_namespace *ns = data;
274    struct kern_ipc_perm *ipcp = p;
275    struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
276
277    if (shp->shm_creator != current)
278        return 0;
279
280    /*
281     * Mark it as orphaned to destroy the segment when
282     * kernel.shm_rmid_forced is changed.
283     * It is noop if the following shm_may_destroy() returns true.
284     */
285    shp->shm_creator = NULL;
286
287    /*
288     * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
289     * is not set, it shouldn't be deleted here.
290     */
291    if (!ns->shm_rmid_forced)
292        return 0;
293
294    if (shm_may_destroy(ns, shp)) {
295        shm_lock_by_ptr(shp);
296        shm_destroy(ns, shp);
297    }
298    return 0;
299}
300
301/* Called with ns->shm_ids(ns).rwsem locked */
302static int shm_try_destroy_orphaned(int id, void *p, void *data)
303{
304    struct ipc_namespace *ns = data;
305    struct kern_ipc_perm *ipcp = p;
306    struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
307
308    /*
309     * We want to destroy segments without users and with already
310     * exit'ed originating process.
311     *
312     * As shp->* are changed under rwsem, it's safe to skip shp locking.
313     */
314    if (shp->shm_creator != NULL)
315        return 0;
316
317    if (shm_may_destroy(ns, shp)) {
318        shm_lock_by_ptr(shp);
319        shm_destroy(ns, shp);
320    }
321    return 0;
322}
323
324void shm_destroy_orphaned(struct ipc_namespace *ns)
325{
326    down_write(&shm_ids(ns).rwsem);
327    if (shm_ids(ns).in_use)
328        idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
329    up_write(&shm_ids(ns).rwsem);
330}
331
332
333void exit_shm(struct task_struct *task)
334{
335    struct ipc_namespace *ns = task->nsproxy->ipc_ns;
336
337    if (shm_ids(ns).in_use == 0)
338        return;
339
340    /* Destroy all already created segments, but not mapped yet */
341    down_write(&shm_ids(ns).rwsem);
342    if (shm_ids(ns).in_use)
343        idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
344    up_write(&shm_ids(ns).rwsem);
345}
346
347static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
348{
349    struct file *file = vma->vm_file;
350    struct shm_file_data *sfd = shm_file_data(file);
351
352    return sfd->vm_ops->fault(vma, vmf);
353}
354
355#ifdef CONFIG_NUMA
356static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
357{
358    struct file *file = vma->vm_file;
359    struct shm_file_data *sfd = shm_file_data(file);
360    int err = 0;
361    if (sfd->vm_ops->set_policy)
362        err = sfd->vm_ops->set_policy(vma, new);
363    return err;
364}
365
366static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
367                    unsigned long addr)
368{
369    struct file *file = vma->vm_file;
370    struct shm_file_data *sfd = shm_file_data(file);
371    struct mempolicy *pol = NULL;
372
373    if (sfd->vm_ops->get_policy)
374        pol = sfd->vm_ops->get_policy(vma, addr);
375    else if (vma->vm_policy)
376        pol = vma->vm_policy;
377
378    return pol;
379}
380#endif
381
382static int shm_mmap(struct file *file, struct vm_area_struct *vma)
383{
384    struct shm_file_data *sfd = shm_file_data(file);
385    int ret;
386
387    ret = sfd->file->f_op->mmap(sfd->file, vma);
388    if (ret != 0)
389        return ret;
390    sfd->vm_ops = vma->vm_ops;
391#ifdef CONFIG_MMU
392    BUG_ON(!sfd->vm_ops->fault);
393#endif
394    vma->vm_ops = &shm_vm_ops;
395    shm_open(vma);
396
397    return ret;
398}
399
400static int shm_release(struct inode *ino, struct file *file)
401{
402    struct shm_file_data *sfd = shm_file_data(file);
403
404    put_ipc_ns(sfd->ns);
405    shm_file_data(file) = NULL;
406    kfree(sfd);
407    return 0;
408}
409
410static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
411{
412    struct shm_file_data *sfd = shm_file_data(file);
413
414    if (!sfd->file->f_op->fsync)
415        return -EINVAL;
416    return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
417}
418
419static long shm_fallocate(struct file *file, int mode, loff_t offset,
420              loff_t len)
421{
422    struct shm_file_data *sfd = shm_file_data(file);
423
424    if (!sfd->file->f_op->fallocate)
425        return -EOPNOTSUPP;
426    return sfd->file->f_op->fallocate(file, mode, offset, len);
427}
428
429static unsigned long shm_get_unmapped_area(struct file *file,
430    unsigned long addr, unsigned long len, unsigned long pgoff,
431    unsigned long flags)
432{
433    struct shm_file_data *sfd = shm_file_data(file);
434    return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
435                        pgoff, flags);
436}
437
438static const struct file_operations shm_file_operations = {
439    .mmap = shm_mmap,
440    .fsync = shm_fsync,
441    .release = shm_release,
442#ifndef CONFIG_MMU
443    .get_unmapped_area = shm_get_unmapped_area,
444#endif
445    .llseek = noop_llseek,
446    .fallocate = shm_fallocate,
447};
448
449static const struct file_operations shm_file_operations_huge = {
450    .mmap = shm_mmap,
451    .fsync = shm_fsync,
452    .release = shm_release,
453    .get_unmapped_area = shm_get_unmapped_area,
454    .llseek = noop_llseek,
455    .fallocate = shm_fallocate,
456};
457
458int is_file_shm_hugepages(struct file *file)
459{
460    return file->f_op == &shm_file_operations_huge;
461}
462
463static const struct vm_operations_struct shm_vm_ops = {
464    .open = shm_open, /* callback for a new vm-area open */
465    .close = shm_close, /* callback for when the vm-area is released */
466    .fault = shm_fault,
467#if defined(CONFIG_NUMA)
468    .set_policy = shm_set_policy,
469    .get_policy = shm_get_policy,
470#endif
471};
472
473/**
474 * newseg - Create a new shared memory segment
475 * @ns: namespace
476 * @params: ptr to the structure that contains key, size and shmflg
477 *
478 * Called with shm_ids.rwsem held as a writer.
479 */
480static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
481{
482    key_t key = params->key;
483    int shmflg = params->flg;
484    size_t size = params->u.size;
485    int error;
486    struct shmid_kernel *shp;
487    size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
488    struct file *file;
489    char name[13];
490    int id;
491    vm_flags_t acctflag = 0;
492
493    if (size < SHMMIN || size > ns->shm_ctlmax)
494        return -EINVAL;
495
496    if (numpages << PAGE_SHIFT < size)
497        return -ENOSPC;
498
499    if (ns->shm_tot + numpages < ns->shm_tot ||
500            ns->shm_tot + numpages > ns->shm_ctlall)
501        return -ENOSPC;
502
503    shp = ipc_rcu_alloc(sizeof(*shp));
504    if (!shp)
505        return -ENOMEM;
506
507    shp->shm_perm.key = key;
508    shp->shm_perm.mode = (shmflg & S_IRWXUGO);
509    shp->mlock_user = NULL;
510
511    shp->shm_perm.security = NULL;
512    error = security_shm_alloc(shp);
513    if (error) {
514        ipc_rcu_putref(shp, ipc_rcu_free);
515        return error;
516    }
517
518    sprintf(name, "SYSV%08x", key);
519    if (shmflg & SHM_HUGETLB) {
520        struct hstate *hs;
521        size_t hugesize;
522
523        hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
524        if (!hs) {
525            error = -EINVAL;
526            goto no_file;
527        }
528        hugesize = ALIGN(size, huge_page_size(hs));
529
530        /* hugetlb_file_setup applies strict accounting */
531        if (shmflg & SHM_NORESERVE)
532            acctflag = VM_NORESERVE;
533        file = hugetlb_file_setup(name, hugesize, acctflag,
534                  &shp->mlock_user, HUGETLB_SHMFS_INODE,
535                (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
536    } else {
537        /*
538         * Do not allow no accounting for OVERCOMMIT_NEVER, even
539         * if it's asked for.
540         */
541        if ((shmflg & SHM_NORESERVE) &&
542                sysctl_overcommit_memory != OVERCOMMIT_NEVER)
543            acctflag = VM_NORESERVE;
544        file = shmem_file_setup(name, size, acctflag);
545    }
546    error = PTR_ERR(file);
547    if (IS_ERR(file))
548        goto no_file;
549
550    id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
551    if (id < 0) {
552        error = id;
553        goto no_id;
554    }
555
556    shp->shm_cprid = task_tgid_vnr(current);
557    shp->shm_lprid = 0;
558    shp->shm_atim = shp->shm_dtim = 0;
559    shp->shm_ctim = get_seconds();
560    shp->shm_segsz = size;
561    shp->shm_nattch = 0;
562    shp->shm_file = file;
563    shp->shm_creator = current;
564
565    /*
566     * shmid gets reported as "inode#" in /proc/pid/maps.
567     * proc-ps tools use this. Changing this will break them.
568     */
569    file_inode(file)->i_ino = shp->shm_perm.id;
570
571    ns->shm_tot += numpages;
572    error = shp->shm_perm.id;
573
574    ipc_unlock_object(&shp->shm_perm);
575    rcu_read_unlock();
576    return error;
577
578no_id:
579    if (is_file_hugepages(file) && shp->mlock_user)
580        user_shm_unlock(size, shp->mlock_user);
581    fput(file);
582no_file:
583    ipc_rcu_putref(shp, shm_rcu_free);
584    return error;
585}
586
587/*
588 * Called with shm_ids.rwsem and ipcp locked.
589 */
590static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
591{
592    struct shmid_kernel *shp;
593
594    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
595    return security_shm_associate(shp, shmflg);
596}
597
598/*
599 * Called with shm_ids.rwsem and ipcp locked.
600 */
601static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
602                struct ipc_params *params)
603{
604    struct shmid_kernel *shp;
605
606    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
607    if (shp->shm_segsz < params->u.size)
608        return -EINVAL;
609
610    return 0;
611}
612
613SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
614{
615    struct ipc_namespace *ns;
616    static const struct ipc_ops shm_ops = {
617        .getnew = newseg,
618        .associate = shm_security,
619        .more_checks = shm_more_checks,
620    };
621    struct ipc_params shm_params;
622
623    ns = current->nsproxy->ipc_ns;
624
625    shm_params.key = key;
626    shm_params.flg = shmflg;
627    shm_params.u.size = size;
628
629    return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
630}
631
632static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
633{
634    switch (version) {
635    case IPC_64:
636        return copy_to_user(buf, in, sizeof(*in));
637    case IPC_OLD:
638        {
639        struct shmid_ds out;
640
641        memset(&out, 0, sizeof(out));
642        ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
643        out.shm_segsz = in->shm_segsz;
644        out.shm_atime = in->shm_atime;
645        out.shm_dtime = in->shm_dtime;
646        out.shm_ctime = in->shm_ctime;
647        out.shm_cpid = in->shm_cpid;
648        out.shm_lpid = in->shm_lpid;
649        out.shm_nattch = in->shm_nattch;
650
651        return copy_to_user(buf, &out, sizeof(out));
652        }
653    default:
654        return -EINVAL;
655    }
656}
657
658static inline unsigned long
659copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
660{
661    switch (version) {
662    case IPC_64:
663        if (copy_from_user(out, buf, sizeof(*out)))
664            return -EFAULT;
665        return 0;
666    case IPC_OLD:
667        {
668        struct shmid_ds tbuf_old;
669
670        if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
671            return -EFAULT;
672
673        out->shm_perm.uid = tbuf_old.shm_perm.uid;
674        out->shm_perm.gid = tbuf_old.shm_perm.gid;
675        out->shm_perm.mode = tbuf_old.shm_perm.mode;
676
677        return 0;
678        }
679    default:
680        return -EINVAL;
681    }
682}
683
684static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
685{
686    switch (version) {
687    case IPC_64:
688        return copy_to_user(buf, in, sizeof(*in));
689    case IPC_OLD:
690        {
691        struct shminfo out;
692
693        if (in->shmmax > INT_MAX)
694            out.shmmax = INT_MAX;
695        else
696            out.shmmax = (int)in->shmmax;
697
698        out.shmmin = in->shmmin;
699        out.shmmni = in->shmmni;
700        out.shmseg = in->shmseg;
701        out.shmall = in->shmall;
702
703        return copy_to_user(buf, &out, sizeof(out));
704        }
705    default:
706        return -EINVAL;
707    }
708}
709
710/*
711 * Calculate and add used RSS and swap pages of a shm.
712 * Called with shm_ids.rwsem held as a reader
713 */
714static void shm_add_rss_swap(struct shmid_kernel *shp,
715    unsigned long *rss_add, unsigned long *swp_add)
716{
717    struct inode *inode;
718
719    inode = file_inode(shp->shm_file);
720
721    if (is_file_hugepages(shp->shm_file)) {
722        struct address_space *mapping = inode->i_mapping;
723        struct hstate *h = hstate_file(shp->shm_file);
724        *rss_add += pages_per_huge_page(h) * mapping->nrpages;
725    } else {
726#ifdef CONFIG_SHMEM
727        struct shmem_inode_info *info = SHMEM_I(inode);
728        spin_lock(&info->lock);
729        *rss_add += inode->i_mapping->nrpages;
730        *swp_add += info->swapped;
731        spin_unlock(&info->lock);
732#else
733        *rss_add += inode->i_mapping->nrpages;
734#endif
735    }
736}
737
738/*
739 * Called with shm_ids.rwsem held as a reader
740 */
741static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
742        unsigned long *swp)
743{
744    int next_id;
745    int total, in_use;
746
747    *rss = 0;
748    *swp = 0;
749
750    in_use = shm_ids(ns).in_use;
751
752    for (total = 0, next_id = 0; total < in_use; next_id++) {
753        struct kern_ipc_perm *ipc;
754        struct shmid_kernel *shp;
755
756        ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
757        if (ipc == NULL)
758            continue;
759        shp = container_of(ipc, struct shmid_kernel, shm_perm);
760
761        shm_add_rss_swap(shp, rss, swp);
762
763        total++;
764    }
765}
766
767/*
768 * This function handles some shmctl commands which require the rwsem
769 * to be held in write mode.
770 * NOTE: no locks must be held, the rwsem is taken inside this function.
771 */
772static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
773               struct shmid_ds __user *buf, int version)
774{
775    struct kern_ipc_perm *ipcp;
776    struct shmid64_ds shmid64;
777    struct shmid_kernel *shp;
778    int err;
779
780    if (cmd == IPC_SET) {
781        if (copy_shmid_from_user(&shmid64, buf, version))
782            return -EFAULT;
783    }
784
785    down_write(&shm_ids(ns).rwsem);
786    rcu_read_lock();
787
788    ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
789                      &shmid64.shm_perm, 0);
790    if (IS_ERR(ipcp)) {
791        err = PTR_ERR(ipcp);
792        goto out_unlock1;
793    }
794
795    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
796
797    err = security_shm_shmctl(shp, cmd);
798    if (err)
799        goto out_unlock1;
800
801    switch (cmd) {
802    case IPC_RMID:
803        ipc_lock_object(&shp->shm_perm);
804        /* do_shm_rmid unlocks the ipc object and rcu */
805        do_shm_rmid(ns, ipcp);
806        goto out_up;
807    case IPC_SET:
808        ipc_lock_object(&shp->shm_perm);
809        err = ipc_update_perm(&shmid64.shm_perm, ipcp);
810        if (err)
811            goto out_unlock0;
812        shp->shm_ctim = get_seconds();
813        break;
814    default:
815        err = -EINVAL;
816        goto out_unlock1;
817    }
818
819out_unlock0:
820    ipc_unlock_object(&shp->shm_perm);
821out_unlock1:
822    rcu_read_unlock();
823out_up:
824    up_write(&shm_ids(ns).rwsem);
825    return err;
826}
827
828static int shmctl_nolock(struct ipc_namespace *ns, int shmid,
829             int cmd, int version, void __user *buf)
830{
831    int err;
832    struct shmid_kernel *shp;
833
834    /* preliminary security checks for *_INFO */
835    if (cmd == IPC_INFO || cmd == SHM_INFO) {
836        err = security_shm_shmctl(NULL, cmd);
837        if (err)
838            return err;
839    }
840
841    switch (cmd) {
842    case IPC_INFO:
843    {
844        struct shminfo64 shminfo;
845
846        memset(&shminfo, 0, sizeof(shminfo));
847        shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
848        shminfo.shmmax = ns->shm_ctlmax;
849        shminfo.shmall = ns->shm_ctlall;
850
851        shminfo.shmmin = SHMMIN;
852        if (copy_shminfo_to_user(buf, &shminfo, version))
853            return -EFAULT;
854
855        down_read(&shm_ids(ns).rwsem);
856        err = ipc_get_maxid(&shm_ids(ns));
857        up_read(&shm_ids(ns).rwsem);
858
859        if (err < 0)
860            err = 0;
861        goto out;
862    }
863    case SHM_INFO:
864    {
865        struct shm_info shm_info;
866
867        memset(&shm_info, 0, sizeof(shm_info));
868        down_read(&shm_ids(ns).rwsem);
869        shm_info.used_ids = shm_ids(ns).in_use;
870        shm_get_stat(ns, &shm_info.shm_rss, &shm_info.shm_swp);
871        shm_info.shm_tot = ns->shm_tot;
872        shm_info.swap_attempts = 0;
873        shm_info.swap_successes = 0;
874        err = ipc_get_maxid(&shm_ids(ns));
875        up_read(&shm_ids(ns).rwsem);
876        if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
877            err = -EFAULT;
878            goto out;
879        }
880
881        err = err < 0 ? 0 : err;
882        goto out;
883    }
884    case SHM_STAT:
885    case IPC_STAT:
886    {
887        struct shmid64_ds tbuf;
888        int result;
889
890        rcu_read_lock();
891        if (cmd == SHM_STAT) {
892            shp = shm_obtain_object(ns, shmid);
893            if (IS_ERR(shp)) {
894                err = PTR_ERR(shp);
895                goto out_unlock;
896            }
897            result = shp->shm_perm.id;
898        } else {
899            shp = shm_obtain_object_check(ns, shmid);
900            if (IS_ERR(shp)) {
901                err = PTR_ERR(shp);
902                goto out_unlock;
903            }
904            result = 0;
905        }
906
907        err = -EACCES;
908        if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
909            goto out_unlock;
910
911        err = security_shm_shmctl(shp, cmd);
912        if (err)
913            goto out_unlock;
914
915        memset(&tbuf, 0, sizeof(tbuf));
916        kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
917        tbuf.shm_segsz = shp->shm_segsz;
918        tbuf.shm_atime = shp->shm_atim;
919        tbuf.shm_dtime = shp->shm_dtim;
920        tbuf.shm_ctime = shp->shm_ctim;
921        tbuf.shm_cpid = shp->shm_cprid;
922        tbuf.shm_lpid = shp->shm_lprid;
923        tbuf.shm_nattch = shp->shm_nattch;
924        rcu_read_unlock();
925
926        if (copy_shmid_to_user(buf, &tbuf, version))
927            err = -EFAULT;
928        else
929            err = result;
930        goto out;
931    }
932    default:
933        return -EINVAL;
934    }
935
936out_unlock:
937    rcu_read_unlock();
938out:
939    return err;
940}
941
942SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
943{
944    struct shmid_kernel *shp;
945    int err, version;
946    struct ipc_namespace *ns;
947
948    if (cmd < 0 || shmid < 0)
949        return -EINVAL;
950
951    version = ipc_parse_version(&cmd);
952    ns = current->nsproxy->ipc_ns;
953
954    switch (cmd) {
955    case IPC_INFO:
956    case SHM_INFO:
957    case SHM_STAT:
958    case IPC_STAT:
959        return shmctl_nolock(ns, shmid, cmd, version, buf);
960    case IPC_RMID:
961    case IPC_SET:
962        return shmctl_down(ns, shmid, cmd, buf, version);
963    case SHM_LOCK:
964    case SHM_UNLOCK:
965    {
966        struct file *shm_file;
967
968        rcu_read_lock();
969        shp = shm_obtain_object_check(ns, shmid);
970        if (IS_ERR(shp)) {
971            err = PTR_ERR(shp);
972            goto out_unlock1;
973        }
974
975        audit_ipc_obj(&(shp->shm_perm));
976        err = security_shm_shmctl(shp, cmd);
977        if (err)
978            goto out_unlock1;
979
980        ipc_lock_object(&shp->shm_perm);
981
982        /* check if shm_destroy() is tearing down shp */
983        if (!ipc_valid_object(&shp->shm_perm)) {
984            err = -EIDRM;
985            goto out_unlock0;
986        }
987
988        if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
989            kuid_t euid = current_euid();
990            if (!uid_eq(euid, shp->shm_perm.uid) &&
991                !uid_eq(euid, shp->shm_perm.cuid)) {
992                err = -EPERM;
993                goto out_unlock0;
994            }
995            if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
996                err = -EPERM;
997                goto out_unlock0;
998            }
999        }
1000
1001        shm_file = shp->shm_file;
1002        if (is_file_hugepages(shm_file))
1003            goto out_unlock0;
1004
1005        if (cmd == SHM_LOCK) {
1006            struct user_struct *user = current_user();
1007            err = shmem_lock(shm_file, 1, user);
1008            if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1009                shp->shm_perm.mode |= SHM_LOCKED;
1010                shp->mlock_user = user;
1011            }
1012            goto out_unlock0;
1013        }
1014
1015        /* SHM_UNLOCK */
1016        if (!(shp->shm_perm.mode & SHM_LOCKED))
1017            goto out_unlock0;
1018        shmem_lock(shm_file, 0, shp->mlock_user);
1019        shp->shm_perm.mode &= ~SHM_LOCKED;
1020        shp->mlock_user = NULL;
1021        get_file(shm_file);
1022        ipc_unlock_object(&shp->shm_perm);
1023        rcu_read_unlock();
1024        shmem_unlock_mapping(shm_file->f_mapping);
1025
1026        fput(shm_file);
1027        return err;
1028    }
1029    default:
1030        return -EINVAL;
1031    }
1032
1033out_unlock0:
1034    ipc_unlock_object(&shp->shm_perm);
1035out_unlock1:
1036    rcu_read_unlock();
1037    return err;
1038}
1039
1040/*
1041 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1042 *
1043 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1044 * "raddr" thing points to kernel space, and there has to be a wrapper around
1045 * this.
1046 */
1047long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
1048          unsigned long shmlba)
1049{
1050    struct shmid_kernel *shp;
1051    unsigned long addr;
1052    unsigned long size;
1053    struct file *file;
1054    int err;
1055    unsigned long flags;
1056    unsigned long prot;
1057    int acc_mode;
1058    struct ipc_namespace *ns;
1059    struct shm_file_data *sfd;
1060    struct path path;
1061    fmode_t f_mode;
1062    unsigned long populate = 0;
1063
1064    err = -EINVAL;
1065    if (shmid < 0)
1066        goto out;
1067    else if ((addr = (ulong)shmaddr)) {
1068        if (addr & (shmlba - 1)) {
1069            if (shmflg & SHM_RND)
1070                addr &= ~(shmlba - 1); /* round down */
1071            else
1072#ifndef __ARCH_FORCE_SHMLBA
1073                if (addr & ~PAGE_MASK)
1074#endif
1075                    goto out;
1076        }
1077        flags = MAP_SHARED | MAP_FIXED;
1078    } else {
1079        if ((shmflg & SHM_REMAP))
1080            goto out;
1081
1082        flags = MAP_SHARED;
1083    }
1084
1085    if (shmflg & SHM_RDONLY) {
1086        prot = PROT_READ;
1087        acc_mode = S_IRUGO;
1088        f_mode = FMODE_READ;
1089    } else {
1090        prot = PROT_READ | PROT_WRITE;
1091        acc_mode = S_IRUGO | S_IWUGO;
1092        f_mode = FMODE_READ | FMODE_WRITE;
1093    }
1094    if (shmflg & SHM_EXEC) {
1095        prot |= PROT_EXEC;
1096        acc_mode |= S_IXUGO;
1097    }
1098
1099    /*
1100     * We cannot rely on the fs check since SYSV IPC does have an
1101     * additional creator id...
1102     */
1103    ns = current->nsproxy->ipc_ns;
1104    rcu_read_lock();
1105    shp = shm_obtain_object_check(ns, shmid);
1106    if (IS_ERR(shp)) {
1107        err = PTR_ERR(shp);
1108        goto out_unlock;
1109    }
1110
1111    err = -EACCES;
1112    if (ipcperms(ns, &shp->shm_perm, acc_mode))
1113        goto out_unlock;
1114
1115    err = security_shm_shmat(shp, shmaddr, shmflg);
1116    if (err)
1117        goto out_unlock;
1118
1119    ipc_lock_object(&shp->shm_perm);
1120
1121    /* check if shm_destroy() is tearing down shp */
1122    if (!ipc_valid_object(&shp->shm_perm)) {
1123        ipc_unlock_object(&shp->shm_perm);
1124        err = -EIDRM;
1125        goto out_unlock;
1126    }
1127
1128    path = shp->shm_file->f_path;
1129    path_get(&path);
1130    shp->shm_nattch++;
1131    size = i_size_read(path.dentry->d_inode);
1132    ipc_unlock_object(&shp->shm_perm);
1133    rcu_read_unlock();
1134
1135    err = -ENOMEM;
1136    sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1137    if (!sfd) {
1138        path_put(&path);
1139        goto out_nattch;
1140    }
1141
1142    file = alloc_file(&path, f_mode,
1143              is_file_hugepages(shp->shm_file) ?
1144                &shm_file_operations_huge :
1145                &shm_file_operations);
1146    err = PTR_ERR(file);
1147    if (IS_ERR(file)) {
1148        kfree(sfd);
1149        path_put(&path);
1150        goto out_nattch;
1151    }
1152
1153    file->private_data = sfd;
1154    file->f_mapping = shp->shm_file->f_mapping;
1155    sfd->id = shp->shm_perm.id;
1156    sfd->ns = get_ipc_ns(ns);
1157    sfd->file = shp->shm_file;
1158    sfd->vm_ops = NULL;
1159
1160    err = security_mmap_file(file, prot, flags);
1161    if (err)
1162        goto out_fput;
1163
1164    down_write(&current->mm->mmap_sem);
1165    if (addr && !(shmflg & SHM_REMAP)) {
1166        err = -EINVAL;
1167        if (addr + size < addr)
1168            goto invalid;
1169
1170        if (find_vma_intersection(current->mm, addr, addr + size))
1171            goto invalid;
1172        /*
1173         * If shm segment goes below stack, make sure there is some
1174         * space left for the stack to grow (at least 4 pages).
1175         */
1176        if (addr < current->mm->start_stack &&
1177            addr > current->mm->start_stack - size - PAGE_SIZE * 5)
1178            goto invalid;
1179    }
1180
1181    addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
1182    *raddr = addr;
1183    err = 0;
1184    if (IS_ERR_VALUE(addr))
1185        err = (long)addr;
1186invalid:
1187    up_write(&current->mm->mmap_sem);
1188    if (populate)
1189        mm_populate(addr, populate);
1190
1191out_fput:
1192    fput(file);
1193
1194out_nattch:
1195    down_write(&shm_ids(ns).rwsem);
1196    shp = shm_lock(ns, shmid);
1197    BUG_ON(IS_ERR(shp));
1198    shp->shm_nattch--;
1199    if (shm_may_destroy(ns, shp))
1200        shm_destroy(ns, shp);
1201    else
1202        shm_unlock(shp);
1203    up_write(&shm_ids(ns).rwsem);
1204    return err;
1205
1206out_unlock:
1207    rcu_read_unlock();
1208out:
1209    return err;
1210}
1211
1212SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1213{
1214    unsigned long ret;
1215    long err;
1216
1217    err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1218    if (err)
1219        return err;
1220    force_successful_syscall_return();
1221    return (long)ret;
1222}
1223
1224/*
1225 * detach and kill segment if marked destroyed.
1226 * The work is done in shm_close.
1227 */
1228SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1229{
1230    struct mm_struct *mm = current->mm;
1231    struct vm_area_struct *vma;
1232    unsigned long addr = (unsigned long)shmaddr;
1233    int retval = -EINVAL;
1234#ifdef CONFIG_MMU
1235    loff_t size = 0;
1236    struct vm_area_struct *next;
1237#endif
1238
1239    if (addr & ~PAGE_MASK)
1240        return retval;
1241
1242    down_write(&mm->mmap_sem);
1243
1244    /*
1245     * This function tries to be smart and unmap shm segments that
1246     * were modified by partial mlock or munmap calls:
1247     * - It first determines the size of the shm segment that should be
1248     * unmapped: It searches for a vma that is backed by shm and that
1249     * started at address shmaddr. It records it's size and then unmaps
1250     * it.
1251     * - Then it unmaps all shm vmas that started at shmaddr and that
1252     * are within the initially determined size.
1253     * Errors from do_munmap are ignored: the function only fails if
1254     * it's called with invalid parameters or if it's called to unmap
1255     * a part of a vma. Both calls in this function are for full vmas,
1256     * the parameters are directly copied from the vma itself and always
1257     * valid - therefore do_munmap cannot fail. (famous last words?)
1258     */
1259    /*
1260     * If it had been mremap()'d, the starting address would not
1261     * match the usual checks anyway. So assume all vma's are
1262     * above the starting address given.
1263     */
1264    vma = find_vma(mm, addr);
1265
1266#ifdef CONFIG_MMU
1267    while (vma) {
1268        next = vma->vm_next;
1269
1270        /*
1271         * Check if the starting address would match, i.e. it's
1272         * a fragment created by mprotect() and/or munmap(), or it
1273         * otherwise it starts at this address with no hassles.
1274         */
1275        if ((vma->vm_ops == &shm_vm_ops) &&
1276            (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1277
1278
1279            size = file_inode(vma->vm_file)->i_size;
1280            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1281            /*
1282             * We discovered the size of the shm segment, so
1283             * break out of here and fall through to the next
1284             * loop that uses the size information to stop
1285             * searching for matching vma's.
1286             */
1287            retval = 0;
1288            vma = next;
1289            break;
1290        }
1291        vma = next;
1292    }
1293
1294    /*
1295     * We need look no further than the maximum address a fragment
1296     * could possibly have landed at. Also cast things to loff_t to
1297     * prevent overflows and make comparisons vs. equal-width types.
1298     */
1299    size = PAGE_ALIGN(size);
1300    while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1301        next = vma->vm_next;
1302
1303        /* finding a matching vma now does not alter retval */
1304        if ((vma->vm_ops == &shm_vm_ops) &&
1305            (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
1306
1307            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1308        vma = next;
1309    }
1310
1311#else /* CONFIG_MMU */
1312    /* under NOMMU conditions, the exact address to be destroyed must be
1313     * given */
1314    if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1315        do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1316        retval = 0;
1317    }
1318
1319#endif
1320
1321    up_write(&mm->mmap_sem);
1322    return retval;
1323}
1324
1325#ifdef CONFIG_PROC_FS
1326static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1327{
1328    struct user_namespace *user_ns = seq_user_ns(s);
1329    struct shmid_kernel *shp = it;
1330    unsigned long rss = 0, swp = 0;
1331
1332    shm_add_rss_swap(shp, &rss, &swp);
1333
1334#if BITS_PER_LONG <= 32
1335#define SIZE_SPEC "%10lu"
1336#else
1337#define SIZE_SPEC "%21lu"
1338#endif
1339
1340    return seq_printf(s,
1341              "%10d %10d %4o " SIZE_SPEC " %5u %5u "
1342              "%5lu %5u %5u %5u %5u %10lu %10lu %10lu "
1343              SIZE_SPEC " " SIZE_SPEC "\n",
1344              shp->shm_perm.key,
1345              shp->shm_perm.id,
1346              shp->shm_perm.mode,
1347              shp->shm_segsz,
1348              shp->shm_cprid,
1349              shp->shm_lprid,
1350              shp->shm_nattch,
1351              from_kuid_munged(user_ns, shp->shm_perm.uid),
1352              from_kgid_munged(user_ns, shp->shm_perm.gid),
1353              from_kuid_munged(user_ns, shp->shm_perm.cuid),
1354              from_kgid_munged(user_ns, shp->shm_perm.cgid),
1355              shp->shm_atim,
1356              shp->shm_dtim,
1357              shp->shm_ctim,
1358              rss * PAGE_SIZE,
1359              swp * PAGE_SIZE);
1360}
1361#endif
1362

Archive Download this file



interactive