Root/ipc/msg.c

Source at commit fbf123cd4cc0c097fe9a99c90109ebb2a5e94a50 created 7 years 11 months ago.
By Lars-Peter Clausen, dma: jz4740: Dequeue descriptor from active list before completing it
1/*
2 * linux/ipc/msg.c
3 * Copyright (C) 1992 Krishna Balasubramanian
4 *
5 * Removed all the remaining kerneld mess
6 * Catch the -EFAULT stuff properly
7 * Use GFP_KERNEL for messages as in 1.2
8 * Fixed up the unchecked user space derefs
9 * Copyright (C) 1998 Alan Cox & Andi Kleen
10 *
11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
12 *
13 * mostly rewritten, threaded and wake-one semantics added
14 * MSGMAX limit removed, sysctl's added
15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
16 *
17 * support for audit of ipc object properties and permission changes
18 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19 *
20 * namespaces support
21 * OpenVZ, SWsoft Inc.
22 * Pavel Emelianov <xemul@openvz.org>
23 */
24
25#include <linux/capability.h>
26#include <linux/msg.h>
27#include <linux/spinlock.h>
28#include <linux/init.h>
29#include <linux/mm.h>
30#include <linux/proc_fs.h>
31#include <linux/list.h>
32#include <linux/security.h>
33#include <linux/sched.h>
34#include <linux/syscalls.h>
35#include <linux/audit.h>
36#include <linux/seq_file.h>
37#include <linux/rwsem.h>
38#include <linux/nsproxy.h>
39#include <linux/ipc_namespace.h>
40
41#include <asm/current.h>
42#include <linux/uaccess.h>
43#include "util.h"
44
45/* one msg_receiver structure for each sleeping receiver */
46struct msg_receiver {
47    struct list_head r_list;
48    struct task_struct *r_tsk;
49
50    int r_mode;
51    long r_msgtype;
52    long r_maxsize;
53
54    /*
55     * Mark r_msg volatile so that the compiler
56     * does not try to get smart and optimize
57     * it. We rely on this for the lockless
58     * receive algorithm.
59     */
60    struct msg_msg *volatile r_msg;
61};
62
63/* one msg_sender for each sleeping sender */
64struct msg_sender {
65    struct list_head list;
66    struct task_struct *tsk;
67};
68
69#define SEARCH_ANY 1
70#define SEARCH_EQUAL 2
71#define SEARCH_NOTEQUAL 3
72#define SEARCH_LESSEQUAL 4
73#define SEARCH_NUMBER 5
74
75#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS])
76
77static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id)
78{
79    struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id);
80
81    if (IS_ERR(ipcp))
82        return ERR_CAST(ipcp);
83
84    return container_of(ipcp, struct msg_queue, q_perm);
85}
86
87static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns,
88                            int id)
89{
90    struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id);
91
92    if (IS_ERR(ipcp))
93        return ERR_CAST(ipcp);
94
95    return container_of(ipcp, struct msg_queue, q_perm);
96}
97
98static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
99{
100    ipc_rmid(&msg_ids(ns), &s->q_perm);
101}
102
103static void msg_rcu_free(struct rcu_head *head)
104{
105    struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
106    struct msg_queue *msq = ipc_rcu_to_struct(p);
107
108    security_msg_queue_free(msq);
109    ipc_rcu_free(head);
110}
111
112/**
113 * newque - Create a new msg queue
114 * @ns: namespace
115 * @params: ptr to the structure that contains the key and msgflg
116 *
117 * Called with msg_ids.rwsem held (writer)
118 */
119static int newque(struct ipc_namespace *ns, struct ipc_params *params)
120{
121    struct msg_queue *msq;
122    int id, retval;
123    key_t key = params->key;
124    int msgflg = params->flg;
125
126    msq = ipc_rcu_alloc(sizeof(*msq));
127    if (!msq)
128        return -ENOMEM;
129
130    msq->q_perm.mode = msgflg & S_IRWXUGO;
131    msq->q_perm.key = key;
132
133    msq->q_perm.security = NULL;
134    retval = security_msg_queue_alloc(msq);
135    if (retval) {
136        ipc_rcu_putref(msq, ipc_rcu_free);
137        return retval;
138    }
139
140    /* ipc_addid() locks msq upon success. */
141    id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
142    if (id < 0) {
143        ipc_rcu_putref(msq, msg_rcu_free);
144        return id;
145    }
146
147    msq->q_stime = msq->q_rtime = 0;
148    msq->q_ctime = get_seconds();
149    msq->q_cbytes = msq->q_qnum = 0;
150    msq->q_qbytes = ns->msg_ctlmnb;
151    msq->q_lspid = msq->q_lrpid = 0;
152    INIT_LIST_HEAD(&msq->q_messages);
153    INIT_LIST_HEAD(&msq->q_receivers);
154    INIT_LIST_HEAD(&msq->q_senders);
155
156    ipc_unlock_object(&msq->q_perm);
157    rcu_read_unlock();
158
159    return msq->q_perm.id;
160}
161
162static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
163{
164    mss->tsk = current;
165    __set_current_state(TASK_INTERRUPTIBLE);
166    list_add_tail(&mss->list, &msq->q_senders);
167}
168
169static inline void ss_del(struct msg_sender *mss)
170{
171    if (mss->list.next != NULL)
172        list_del(&mss->list);
173}
174
175static void ss_wakeup(struct list_head *h, int kill)
176{
177    struct msg_sender *mss, *t;
178
179    list_for_each_entry_safe(mss, t, h, list) {
180        if (kill)
181            mss->list.next = NULL;
182        wake_up_process(mss->tsk);
183    }
184}
185
186static void expunge_all(struct msg_queue *msq, int res)
187{
188    struct msg_receiver *msr, *t;
189
190    list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
191        msr->r_msg = NULL; /* initialize expunge ordering */
192        wake_up_process(msr->r_tsk);
193        /*
194         * Ensure that the wakeup is visible before setting r_msg as
195         * the receiving end depends on it: either spinning on a nil,
196         * or dealing with -EAGAIN cases. See lockless receive part 1
197         * and 2 in do_msgrcv().
198         */
199        smp_mb();
200        msr->r_msg = ERR_PTR(res);
201    }
202}
203
204/*
205 * freeque() wakes up waiters on the sender and receiver waiting queue,
206 * removes the message queue from message queue ID IDR, and cleans up all the
207 * messages associated with this queue.
208 *
209 * msg_ids.rwsem (writer) and the spinlock for this message queue are held
210 * before freeque() is called. msg_ids.rwsem remains locked on exit.
211 */
212static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
213{
214    struct msg_msg *msg, *t;
215    struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
216
217    expunge_all(msq, -EIDRM);
218    ss_wakeup(&msq->q_senders, 1);
219    msg_rmid(ns, msq);
220    ipc_unlock_object(&msq->q_perm);
221    rcu_read_unlock();
222
223    list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
224        atomic_dec(&ns->msg_hdrs);
225        free_msg(msg);
226    }
227    atomic_sub(msq->q_cbytes, &ns->msg_bytes);
228    ipc_rcu_putref(msq, msg_rcu_free);
229}
230
231/*
232 * Called with msg_ids.rwsem and ipcp locked.
233 */
234static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
235{
236    struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
237
238    return security_msg_queue_associate(msq, msgflg);
239}
240
241SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
242{
243    struct ipc_namespace *ns;
244    static const struct ipc_ops msg_ops = {
245        .getnew = newque,
246        .associate = msg_security,
247    };
248    struct ipc_params msg_params;
249
250    ns = current->nsproxy->ipc_ns;
251
252    msg_params.key = key;
253    msg_params.flg = msgflg;
254
255    return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
256}
257
258static inline unsigned long
259copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
260{
261    switch (version) {
262    case IPC_64:
263        return copy_to_user(buf, in, sizeof(*in));
264    case IPC_OLD:
265    {
266        struct msqid_ds out;
267
268        memset(&out, 0, sizeof(out));
269
270        ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);
271
272        out.msg_stime = in->msg_stime;
273        out.msg_rtime = in->msg_rtime;
274        out.msg_ctime = in->msg_ctime;
275
276        if (in->msg_cbytes > USHRT_MAX)
277            out.msg_cbytes = USHRT_MAX;
278        else
279            out.msg_cbytes = in->msg_cbytes;
280        out.msg_lcbytes = in->msg_cbytes;
281
282        if (in->msg_qnum > USHRT_MAX)
283            out.msg_qnum = USHRT_MAX;
284        else
285            out.msg_qnum = in->msg_qnum;
286
287        if (in->msg_qbytes > USHRT_MAX)
288            out.msg_qbytes = USHRT_MAX;
289        else
290            out.msg_qbytes = in->msg_qbytes;
291        out.msg_lqbytes = in->msg_qbytes;
292
293        out.msg_lspid = in->msg_lspid;
294        out.msg_lrpid = in->msg_lrpid;
295
296        return copy_to_user(buf, &out, sizeof(out));
297    }
298    default:
299        return -EINVAL;
300    }
301}
302
303static inline unsigned long
304copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
305{
306    switch (version) {
307    case IPC_64:
308        if (copy_from_user(out, buf, sizeof(*out)))
309            return -EFAULT;
310        return 0;
311    case IPC_OLD:
312    {
313        struct msqid_ds tbuf_old;
314
315        if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
316            return -EFAULT;
317
318        out->msg_perm.uid = tbuf_old.msg_perm.uid;
319        out->msg_perm.gid = tbuf_old.msg_perm.gid;
320        out->msg_perm.mode = tbuf_old.msg_perm.mode;
321
322        if (tbuf_old.msg_qbytes == 0)
323            out->msg_qbytes = tbuf_old.msg_lqbytes;
324        else
325            out->msg_qbytes = tbuf_old.msg_qbytes;
326
327        return 0;
328    }
329    default:
330        return -EINVAL;
331    }
332}
333
334/*
335 * This function handles some msgctl commands which require the rwsem
336 * to be held in write mode.
337 * NOTE: no locks must be held, the rwsem is taken inside this function.
338 */
339static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
340               struct msqid_ds __user *buf, int version)
341{
342    struct kern_ipc_perm *ipcp;
343    struct msqid64_ds uninitialized_var(msqid64);
344    struct msg_queue *msq;
345    int err;
346
347    if (cmd == IPC_SET) {
348        if (copy_msqid_from_user(&msqid64, buf, version))
349            return -EFAULT;
350    }
351
352    down_write(&msg_ids(ns).rwsem);
353    rcu_read_lock();
354
355    ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
356                      &msqid64.msg_perm, msqid64.msg_qbytes);
357    if (IS_ERR(ipcp)) {
358        err = PTR_ERR(ipcp);
359        goto out_unlock1;
360    }
361
362    msq = container_of(ipcp, struct msg_queue, q_perm);
363
364    err = security_msg_queue_msgctl(msq, cmd);
365    if (err)
366        goto out_unlock1;
367
368    switch (cmd) {
369    case IPC_RMID:
370        ipc_lock_object(&msq->q_perm);
371        /* freeque unlocks the ipc object and rcu */
372        freeque(ns, ipcp);
373        goto out_up;
374    case IPC_SET:
375        if (msqid64.msg_qbytes > ns->msg_ctlmnb &&
376            !capable(CAP_SYS_RESOURCE)) {
377            err = -EPERM;
378            goto out_unlock1;
379        }
380
381        ipc_lock_object(&msq->q_perm);
382        err = ipc_update_perm(&msqid64.msg_perm, ipcp);
383        if (err)
384            goto out_unlock0;
385
386        msq->q_qbytes = msqid64.msg_qbytes;
387
388        msq->q_ctime = get_seconds();
389        /* sleeping receivers might be excluded by
390         * stricter permissions.
391         */
392        expunge_all(msq, -EAGAIN);
393        /* sleeping senders might be able to send
394         * due to a larger queue size.
395         */
396        ss_wakeup(&msq->q_senders, 0);
397        break;
398    default:
399        err = -EINVAL;
400        goto out_unlock1;
401    }
402
403out_unlock0:
404    ipc_unlock_object(&msq->q_perm);
405out_unlock1:
406    rcu_read_unlock();
407out_up:
408    up_write(&msg_ids(ns).rwsem);
409    return err;
410}
411
412static int msgctl_nolock(struct ipc_namespace *ns, int msqid,
413             int cmd, int version, void __user *buf)
414{
415    int err;
416    struct msg_queue *msq;
417
418    switch (cmd) {
419    case IPC_INFO:
420    case MSG_INFO:
421    {
422        struct msginfo msginfo;
423        int max_id;
424
425        if (!buf)
426            return -EFAULT;
427
428        /*
429         * We must not return kernel stack data.
430         * due to padding, it's not enough
431         * to set all member fields.
432         */
433        err = security_msg_queue_msgctl(NULL, cmd);
434        if (err)
435            return err;
436
437        memset(&msginfo, 0, sizeof(msginfo));
438        msginfo.msgmni = ns->msg_ctlmni;
439        msginfo.msgmax = ns->msg_ctlmax;
440        msginfo.msgmnb = ns->msg_ctlmnb;
441        msginfo.msgssz = MSGSSZ;
442        msginfo.msgseg = MSGSEG;
443        down_read(&msg_ids(ns).rwsem);
444        if (cmd == MSG_INFO) {
445            msginfo.msgpool = msg_ids(ns).in_use;
446            msginfo.msgmap = atomic_read(&ns->msg_hdrs);
447            msginfo.msgtql = atomic_read(&ns->msg_bytes);
448        } else {
449            msginfo.msgmap = MSGMAP;
450            msginfo.msgpool = MSGPOOL;
451            msginfo.msgtql = MSGTQL;
452        }
453        max_id = ipc_get_maxid(&msg_ids(ns));
454        up_read(&msg_ids(ns).rwsem);
455        if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
456            return -EFAULT;
457        return (max_id < 0) ? 0 : max_id;
458    }
459
460    case MSG_STAT:
461    case IPC_STAT:
462    {
463        struct msqid64_ds tbuf;
464        int success_return;
465
466        if (!buf)
467            return -EFAULT;
468
469        memset(&tbuf, 0, sizeof(tbuf));
470
471        rcu_read_lock();
472        if (cmd == MSG_STAT) {
473            msq = msq_obtain_object(ns, msqid);
474            if (IS_ERR(msq)) {
475                err = PTR_ERR(msq);
476                goto out_unlock;
477            }
478            success_return = msq->q_perm.id;
479        } else {
480            msq = msq_obtain_object_check(ns, msqid);
481            if (IS_ERR(msq)) {
482                err = PTR_ERR(msq);
483                goto out_unlock;
484            }
485            success_return = 0;
486        }
487
488        err = -EACCES;
489        if (ipcperms(ns, &msq->q_perm, S_IRUGO))
490            goto out_unlock;
491
492        err = security_msg_queue_msgctl(msq, cmd);
493        if (err)
494            goto out_unlock;
495
496        kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
497        tbuf.msg_stime = msq->q_stime;
498        tbuf.msg_rtime = msq->q_rtime;
499        tbuf.msg_ctime = msq->q_ctime;
500        tbuf.msg_cbytes = msq->q_cbytes;
501        tbuf.msg_qnum = msq->q_qnum;
502        tbuf.msg_qbytes = msq->q_qbytes;
503        tbuf.msg_lspid = msq->q_lspid;
504        tbuf.msg_lrpid = msq->q_lrpid;
505        rcu_read_unlock();
506
507        if (copy_msqid_to_user(buf, &tbuf, version))
508            return -EFAULT;
509        return success_return;
510    }
511
512    default:
513        return -EINVAL;
514    }
515
516    return err;
517out_unlock:
518    rcu_read_unlock();
519    return err;
520}
521
522SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
523{
524    int version;
525    struct ipc_namespace *ns;
526
527    if (msqid < 0 || cmd < 0)
528        return -EINVAL;
529
530    version = ipc_parse_version(&cmd);
531    ns = current->nsproxy->ipc_ns;
532
533    switch (cmd) {
534    case IPC_INFO:
535    case MSG_INFO:
536    case MSG_STAT: /* msqid is an index rather than a msg queue id */
537    case IPC_STAT:
538        return msgctl_nolock(ns, msqid, cmd, version, buf);
539    case IPC_SET:
540    case IPC_RMID:
541        return msgctl_down(ns, msqid, cmd, buf, version);
542    default:
543        return -EINVAL;
544    }
545}
546
547static int testmsg(struct msg_msg *msg, long type, int mode)
548{
549    switch (mode) {
550    case SEARCH_ANY:
551    case SEARCH_NUMBER:
552        return 1;
553    case SEARCH_LESSEQUAL:
554        if (msg->m_type <= type)
555            return 1;
556        break;
557    case SEARCH_EQUAL:
558        if (msg->m_type == type)
559            return 1;
560        break;
561    case SEARCH_NOTEQUAL:
562        if (msg->m_type != type)
563            return 1;
564        break;
565    }
566    return 0;
567}
568
569static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
570{
571    struct msg_receiver *msr, *t;
572
573    list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
574        if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
575            !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
576                           msr->r_msgtype, msr->r_mode)) {
577
578            list_del(&msr->r_list);
579            if (msr->r_maxsize < msg->m_ts) {
580                /* initialize pipelined send ordering */
581                msr->r_msg = NULL;
582                wake_up_process(msr->r_tsk);
583                smp_mb(); /* see barrier comment below */
584                msr->r_msg = ERR_PTR(-E2BIG);
585            } else {
586                msr->r_msg = NULL;
587                msq->q_lrpid = task_pid_vnr(msr->r_tsk);
588                msq->q_rtime = get_seconds();
589                wake_up_process(msr->r_tsk);
590                /*
591                 * Ensure that the wakeup is visible before
592                 * setting r_msg, as the receiving end depends
593                 * on it. See lockless receive part 1 and 2 in
594                 * do_msgrcv().
595                 */
596                smp_mb();
597                msr->r_msg = msg;
598
599                return 1;
600            }
601        }
602    }
603
604    return 0;
605}
606
607long do_msgsnd(int msqid, long mtype, void __user *mtext,
608        size_t msgsz, int msgflg)
609{
610    struct msg_queue *msq;
611    struct msg_msg *msg;
612    int err;
613    struct ipc_namespace *ns;
614
615    ns = current->nsproxy->ipc_ns;
616
617    if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
618        return -EINVAL;
619    if (mtype < 1)
620        return -EINVAL;
621
622    msg = load_msg(mtext, msgsz);
623    if (IS_ERR(msg))
624        return PTR_ERR(msg);
625
626    msg->m_type = mtype;
627    msg->m_ts = msgsz;
628
629    rcu_read_lock();
630    msq = msq_obtain_object_check(ns, msqid);
631    if (IS_ERR(msq)) {
632        err = PTR_ERR(msq);
633        goto out_unlock1;
634    }
635
636    ipc_lock_object(&msq->q_perm);
637
638    for (;;) {
639        struct msg_sender s;
640
641        err = -EACCES;
642        if (ipcperms(ns, &msq->q_perm, S_IWUGO))
643            goto out_unlock0;
644
645        /* raced with RMID? */
646        if (!ipc_valid_object(&msq->q_perm)) {
647            err = -EIDRM;
648            goto out_unlock0;
649        }
650
651        err = security_msg_queue_msgsnd(msq, msg, msgflg);
652        if (err)
653            goto out_unlock0;
654
655        if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
656                1 + msq->q_qnum <= msq->q_qbytes) {
657            break;
658        }
659
660        /* queue full, wait: */
661        if (msgflg & IPC_NOWAIT) {
662            err = -EAGAIN;
663            goto out_unlock0;
664        }
665
666        /* enqueue the sender and prepare to block */
667        ss_add(msq, &s);
668
669        if (!ipc_rcu_getref(msq)) {
670            err = -EIDRM;
671            goto out_unlock0;
672        }
673
674        ipc_unlock_object(&msq->q_perm);
675        rcu_read_unlock();
676        schedule();
677
678        rcu_read_lock();
679        ipc_lock_object(&msq->q_perm);
680
681        ipc_rcu_putref(msq, ipc_rcu_free);
682        /* raced with RMID? */
683        if (!ipc_valid_object(&msq->q_perm)) {
684            err = -EIDRM;
685            goto out_unlock0;
686        }
687
688        ss_del(&s);
689
690        if (signal_pending(current)) {
691            err = -ERESTARTNOHAND;
692            goto out_unlock0;
693        }
694
695    }
696    msq->q_lspid = task_tgid_vnr(current);
697    msq->q_stime = get_seconds();
698
699    if (!pipelined_send(msq, msg)) {
700        /* no one is waiting for this message, enqueue it */
701        list_add_tail(&msg->m_list, &msq->q_messages);
702        msq->q_cbytes += msgsz;
703        msq->q_qnum++;
704        atomic_add(msgsz, &ns->msg_bytes);
705        atomic_inc(&ns->msg_hdrs);
706    }
707
708    err = 0;
709    msg = NULL;
710
711out_unlock0:
712    ipc_unlock_object(&msq->q_perm);
713out_unlock1:
714    rcu_read_unlock();
715    if (msg != NULL)
716        free_msg(msg);
717    return err;
718}
719
720SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
721        int, msgflg)
722{
723    long mtype;
724
725    if (get_user(mtype, &msgp->mtype))
726        return -EFAULT;
727    return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
728}
729
730static inline int convert_mode(long *msgtyp, int msgflg)
731{
732    if (msgflg & MSG_COPY)
733        return SEARCH_NUMBER;
734    /*
735     * find message of correct type.
736     * msgtyp = 0 => get first.
737     * msgtyp > 0 => get first message of matching type.
738     * msgtyp < 0 => get message with least type must be < abs(msgtype).
739     */
740    if (*msgtyp == 0)
741        return SEARCH_ANY;
742    if (*msgtyp < 0) {
743        *msgtyp = -*msgtyp;
744        return SEARCH_LESSEQUAL;
745    }
746    if (msgflg & MSG_EXCEPT)
747        return SEARCH_NOTEQUAL;
748    return SEARCH_EQUAL;
749}
750
751static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
752{
753    struct msgbuf __user *msgp = dest;
754    size_t msgsz;
755
756    if (put_user(msg->m_type, &msgp->mtype))
757        return -EFAULT;
758
759    msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
760    if (store_msg(msgp->mtext, msg, msgsz))
761        return -EFAULT;
762    return msgsz;
763}
764
765#ifdef CONFIG_CHECKPOINT_RESTORE
766/*
767 * This function creates new kernel message structure, large enough to store
768 * bufsz message bytes.
769 */
770static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
771{
772    struct msg_msg *copy;
773
774    /*
775     * Create dummy message to copy real message to.
776     */
777    copy = load_msg(buf, bufsz);
778    if (!IS_ERR(copy))
779        copy->m_ts = bufsz;
780    return copy;
781}
782
783static inline void free_copy(struct msg_msg *copy)
784{
785    if (copy)
786        free_msg(copy);
787}
788#else
789static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
790{
791    return ERR_PTR(-ENOSYS);
792}
793
794static inline void free_copy(struct msg_msg *copy)
795{
796}
797#endif
798
799static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
800{
801    struct msg_msg *msg, *found = NULL;
802    long count = 0;
803
804    list_for_each_entry(msg, &msq->q_messages, m_list) {
805        if (testmsg(msg, *msgtyp, mode) &&
806            !security_msg_queue_msgrcv(msq, msg, current,
807                           *msgtyp, mode)) {
808            if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
809                *msgtyp = msg->m_type - 1;
810                found = msg;
811            } else if (mode == SEARCH_NUMBER) {
812                if (*msgtyp == count)
813                    return msg;
814            } else
815                return msg;
816            count++;
817        }
818    }
819
820    return found ?: ERR_PTR(-EAGAIN);
821}
822
823long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg,
824           long (*msg_handler)(void __user *, struct msg_msg *, size_t))
825{
826    int mode;
827    struct msg_queue *msq;
828    struct ipc_namespace *ns;
829    struct msg_msg *msg, *copy = NULL;
830
831    ns = current->nsproxy->ipc_ns;
832
833    if (msqid < 0 || (long) bufsz < 0)
834        return -EINVAL;
835
836    if (msgflg & MSG_COPY) {
837        if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT))
838            return -EINVAL;
839        copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
840        if (IS_ERR(copy))
841            return PTR_ERR(copy);
842    }
843    mode = convert_mode(&msgtyp, msgflg);
844
845    rcu_read_lock();
846    msq = msq_obtain_object_check(ns, msqid);
847    if (IS_ERR(msq)) {
848        rcu_read_unlock();
849        free_copy(copy);
850        return PTR_ERR(msq);
851    }
852
853    for (;;) {
854        struct msg_receiver msr_d;
855
856        msg = ERR_PTR(-EACCES);
857        if (ipcperms(ns, &msq->q_perm, S_IRUGO))
858            goto out_unlock1;
859
860        ipc_lock_object(&msq->q_perm);
861
862        /* raced with RMID? */
863        if (!ipc_valid_object(&msq->q_perm)) {
864            msg = ERR_PTR(-EIDRM);
865            goto out_unlock0;
866        }
867
868        msg = find_msg(msq, &msgtyp, mode);
869        if (!IS_ERR(msg)) {
870            /*
871             * Found a suitable message.
872             * Unlink it from the queue.
873             */
874            if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
875                msg = ERR_PTR(-E2BIG);
876                goto out_unlock0;
877            }
878            /*
879             * If we are copying, then do not unlink message and do
880             * not update queue parameters.
881             */
882            if (msgflg & MSG_COPY) {
883                msg = copy_msg(msg, copy);
884                goto out_unlock0;
885            }
886
887            list_del(&msg->m_list);
888            msq->q_qnum--;
889            msq->q_rtime = get_seconds();
890            msq->q_lrpid = task_tgid_vnr(current);
891            msq->q_cbytes -= msg->m_ts;
892            atomic_sub(msg->m_ts, &ns->msg_bytes);
893            atomic_dec(&ns->msg_hdrs);
894            ss_wakeup(&msq->q_senders, 0);
895
896            goto out_unlock0;
897        }
898
899        /* No message waiting. Wait for a message */
900        if (msgflg & IPC_NOWAIT) {
901            msg = ERR_PTR(-ENOMSG);
902            goto out_unlock0;
903        }
904
905        list_add_tail(&msr_d.r_list, &msq->q_receivers);
906        msr_d.r_tsk = current;
907        msr_d.r_msgtype = msgtyp;
908        msr_d.r_mode = mode;
909        if (msgflg & MSG_NOERROR)
910            msr_d.r_maxsize = INT_MAX;
911        else
912            msr_d.r_maxsize = bufsz;
913        msr_d.r_msg = ERR_PTR(-EAGAIN);
914        __set_current_state(TASK_INTERRUPTIBLE);
915
916        ipc_unlock_object(&msq->q_perm);
917        rcu_read_unlock();
918        schedule();
919
920        /* Lockless receive, part 1:
921         * Disable preemption. We don't hold a reference to the queue
922         * and getting a reference would defeat the idea of a lockless
923         * operation, thus the code relies on rcu to guarantee the
924         * existence of msq:
925         * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
926         * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
927         * rcu_read_lock() prevents preemption between reading r_msg
928         * and acquiring the q_perm.lock in ipc_lock_object().
929         */
930        rcu_read_lock();
931
932        /* Lockless receive, part 2:
933         * Wait until pipelined_send or expunge_all are outside of
934         * wake_up_process(). There is a race with exit(), see
935         * ipc/mqueue.c for the details.
936         */
937        msg = (struct msg_msg *)msr_d.r_msg;
938        while (msg == NULL) {
939            cpu_relax();
940            msg = (struct msg_msg *)msr_d.r_msg;
941        }
942
943        /* Lockless receive, part 3:
944         * If there is a message or an error then accept it without
945         * locking.
946         */
947        if (msg != ERR_PTR(-EAGAIN))
948            goto out_unlock1;
949
950        /* Lockless receive, part 3:
951         * Acquire the queue spinlock.
952         */
953        ipc_lock_object(&msq->q_perm);
954
955        /* Lockless receive, part 4:
956         * Repeat test after acquiring the spinlock.
957         */
958        msg = (struct msg_msg *)msr_d.r_msg;
959        if (msg != ERR_PTR(-EAGAIN))
960            goto out_unlock0;
961
962        list_del(&msr_d.r_list);
963        if (signal_pending(current)) {
964            msg = ERR_PTR(-ERESTARTNOHAND);
965            goto out_unlock0;
966        }
967
968        ipc_unlock_object(&msq->q_perm);
969    }
970
971out_unlock0:
972    ipc_unlock_object(&msq->q_perm);
973out_unlock1:
974    rcu_read_unlock();
975    if (IS_ERR(msg)) {
976        free_copy(copy);
977        return PTR_ERR(msg);
978    }
979
980    bufsz = msg_handler(buf, msg, bufsz);
981    free_msg(msg);
982
983    return bufsz;
984}
985
986SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
987        long, msgtyp, int, msgflg)
988{
989    return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
990}
991
992/*
993 * Scale msgmni with the available lowmem size: the memory dedicated to msg
994 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem.
995 * Also take into account the number of nsproxies created so far.
996 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range.
997 */
998void recompute_msgmni(struct ipc_namespace *ns)
999{
1000    struct sysinfo i;
1001    unsigned long allowed;
1002    int nb_ns;
1003
1004    si_meminfo(&i);
1005    allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit)
1006        / MSGMNB;
1007    nb_ns = atomic_read(&nr_ipc_ns);
1008    allowed /= nb_ns;
1009
1010    if (allowed < MSGMNI) {
1011        ns->msg_ctlmni = MSGMNI;
1012        return;
1013    }
1014
1015    if (allowed > IPCMNI / nb_ns) {
1016        ns->msg_ctlmni = IPCMNI / nb_ns;
1017        return;
1018    }
1019
1020    ns->msg_ctlmni = allowed;
1021}
1022
1023void msg_init_ns(struct ipc_namespace *ns)
1024{
1025    ns->msg_ctlmax = MSGMAX;
1026    ns->msg_ctlmnb = MSGMNB;
1027
1028    recompute_msgmni(ns);
1029
1030    atomic_set(&ns->msg_bytes, 0);
1031    atomic_set(&ns->msg_hdrs, 0);
1032    ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
1033}
1034
1035#ifdef CONFIG_IPC_NS
1036void msg_exit_ns(struct ipc_namespace *ns)
1037{
1038    free_ipcs(ns, &msg_ids(ns), freeque);
1039    idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
1040}
1041#endif
1042
1043#ifdef CONFIG_PROC_FS
1044static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
1045{
1046    struct user_namespace *user_ns = seq_user_ns(s);
1047    struct msg_queue *msq = it;
1048
1049    return seq_printf(s,
1050            "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
1051            msq->q_perm.key,
1052            msq->q_perm.id,
1053            msq->q_perm.mode,
1054            msq->q_cbytes,
1055            msq->q_qnum,
1056            msq->q_lspid,
1057            msq->q_lrpid,
1058            from_kuid_munged(user_ns, msq->q_perm.uid),
1059            from_kgid_munged(user_ns, msq->q_perm.gid),
1060            from_kuid_munged(user_ns, msq->q_perm.cuid),
1061            from_kgid_munged(user_ns, msq->q_perm.cgid),
1062            msq->q_stime,
1063            msq->q_rtime,
1064            msq->q_ctime);
1065}
1066#endif
1067
1068void __init msg_init(void)
1069{
1070    msg_init_ns(&init_ipc_ns);
1071
1072    printk(KERN_INFO "msgmni has been set to %d\n",
1073        init_ipc_ns.msg_ctlmni);
1074
1075    ipc_init_proc_interface("sysvipc/msg",
1076                " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
1077                IPC_MSG_IDS, sysvipc_msg_proc_show);
1078}
1079

Archive Download this file



interactive