Root/
Source at commit fbf123cd4cc0c097fe9a99c90109ebb2a5e94a50 created 10 years 3 months ago. By Lars-Peter Clausen, dma: jz4740: Dequeue descriptor from active list before completing it | |
---|---|
1 | /* |
2 | * linux/ipc/msg.c |
3 | * Copyright (C) 1992 Krishna Balasubramanian |
4 | * |
5 | * Removed all the remaining kerneld mess |
6 | * Catch the -EFAULT stuff properly |
7 | * Use GFP_KERNEL for messages as in 1.2 |
8 | * Fixed up the unchecked user space derefs |
9 | * Copyright (C) 1998 Alan Cox & Andi Kleen |
10 | * |
11 | * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> |
12 | * |
13 | * mostly rewritten, threaded and wake-one semantics added |
14 | * MSGMAX limit removed, sysctl's added |
15 | * (c) 1999 Manfred Spraul <manfred@colorfullife.com> |
16 | * |
17 | * support for audit of ipc object properties and permission changes |
18 | * Dustin Kirkland <dustin.kirkland@us.ibm.com> |
19 | * |
20 | * namespaces support |
21 | * OpenVZ, SWsoft Inc. |
22 | * Pavel Emelianov <xemul@openvz.org> |
23 | */ |
24 | |
25 | #include <linux/capability.h> |
26 | #include <linux/msg.h> |
27 | #include <linux/spinlock.h> |
28 | #include <linux/init.h> |
29 | #include <linux/mm.h> |
30 | #include <linux/proc_fs.h> |
31 | #include <linux/list.h> |
32 | #include <linux/security.h> |
33 | #include <linux/sched.h> |
34 | #include <linux/syscalls.h> |
35 | #include <linux/audit.h> |
36 | #include <linux/seq_file.h> |
37 | #include <linux/rwsem.h> |
38 | #include <linux/nsproxy.h> |
39 | #include <linux/ipc_namespace.h> |
40 | |
41 | #include <asm/current.h> |
42 | #include <linux/uaccess.h> |
43 | #include "util.h" |
44 | |
45 | /* one msg_receiver structure for each sleeping receiver */ |
46 | struct msg_receiver { |
47 | struct list_head r_list; |
48 | struct task_struct *r_tsk; |
49 | |
50 | int r_mode; |
51 | long r_msgtype; |
52 | long r_maxsize; |
53 | |
54 | /* |
55 | * Mark r_msg volatile so that the compiler |
56 | * does not try to get smart and optimize |
57 | * it. We rely on this for the lockless |
58 | * receive algorithm. |
59 | */ |
60 | struct msg_msg *volatile r_msg; |
61 | }; |
62 | |
63 | /* one msg_sender for each sleeping sender */ |
64 | struct msg_sender { |
65 | struct list_head list; |
66 | struct task_struct *tsk; |
67 | }; |
68 | |
69 | #define SEARCH_ANY 1 |
70 | #define SEARCH_EQUAL 2 |
71 | #define SEARCH_NOTEQUAL 3 |
72 | #define SEARCH_LESSEQUAL 4 |
73 | #define SEARCH_NUMBER 5 |
74 | |
75 | #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) |
76 | |
77 | static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) |
78 | { |
79 | struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); |
80 | |
81 | if (IS_ERR(ipcp)) |
82 | return ERR_CAST(ipcp); |
83 | |
84 | return container_of(ipcp, struct msg_queue, q_perm); |
85 | } |
86 | |
87 | static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, |
88 | int id) |
89 | { |
90 | struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); |
91 | |
92 | if (IS_ERR(ipcp)) |
93 | return ERR_CAST(ipcp); |
94 | |
95 | return container_of(ipcp, struct msg_queue, q_perm); |
96 | } |
97 | |
98 | static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) |
99 | { |
100 | ipc_rmid(&msg_ids(ns), &s->q_perm); |
101 | } |
102 | |
103 | static void msg_rcu_free(struct rcu_head *head) |
104 | { |
105 | struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); |
106 | struct msg_queue *msq = ipc_rcu_to_struct(p); |
107 | |
108 | security_msg_queue_free(msq); |
109 | ipc_rcu_free(head); |
110 | } |
111 | |
112 | /** |
113 | * newque - Create a new msg queue |
114 | * @ns: namespace |
115 | * @params: ptr to the structure that contains the key and msgflg |
116 | * |
117 | * Called with msg_ids.rwsem held (writer) |
118 | */ |
119 | static int newque(struct ipc_namespace *ns, struct ipc_params *params) |
120 | { |
121 | struct msg_queue *msq; |
122 | int id, retval; |
123 | key_t key = params->key; |
124 | int msgflg = params->flg; |
125 | |
126 | msq = ipc_rcu_alloc(sizeof(*msq)); |
127 | if (!msq) |
128 | return -ENOMEM; |
129 | |
130 | msq->q_perm.mode = msgflg & S_IRWXUGO; |
131 | msq->q_perm.key = key; |
132 | |
133 | msq->q_perm.security = NULL; |
134 | retval = security_msg_queue_alloc(msq); |
135 | if (retval) { |
136 | ipc_rcu_putref(msq, ipc_rcu_free); |
137 | return retval; |
138 | } |
139 | |
140 | /* ipc_addid() locks msq upon success. */ |
141 | id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); |
142 | if (id < 0) { |
143 | ipc_rcu_putref(msq, msg_rcu_free); |
144 | return id; |
145 | } |
146 | |
147 | msq->q_stime = msq->q_rtime = 0; |
148 | msq->q_ctime = get_seconds(); |
149 | msq->q_cbytes = msq->q_qnum = 0; |
150 | msq->q_qbytes = ns->msg_ctlmnb; |
151 | msq->q_lspid = msq->q_lrpid = 0; |
152 | INIT_LIST_HEAD(&msq->q_messages); |
153 | INIT_LIST_HEAD(&msq->q_receivers); |
154 | INIT_LIST_HEAD(&msq->q_senders); |
155 | |
156 | ipc_unlock_object(&msq->q_perm); |
157 | rcu_read_unlock(); |
158 | |
159 | return msq->q_perm.id; |
160 | } |
161 | |
162 | static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) |
163 | { |
164 | mss->tsk = current; |
165 | __set_current_state(TASK_INTERRUPTIBLE); |
166 | list_add_tail(&mss->list, &msq->q_senders); |
167 | } |
168 | |
169 | static inline void ss_del(struct msg_sender *mss) |
170 | { |
171 | if (mss->list.next != NULL) |
172 | list_del(&mss->list); |
173 | } |
174 | |
175 | static void ss_wakeup(struct list_head *h, int kill) |
176 | { |
177 | struct msg_sender *mss, *t; |
178 | |
179 | list_for_each_entry_safe(mss, t, h, list) { |
180 | if (kill) |
181 | mss->list.next = NULL; |
182 | wake_up_process(mss->tsk); |
183 | } |
184 | } |
185 | |
186 | static void expunge_all(struct msg_queue *msq, int res) |
187 | { |
188 | struct msg_receiver *msr, *t; |
189 | |
190 | list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { |
191 | msr->r_msg = NULL; /* initialize expunge ordering */ |
192 | wake_up_process(msr->r_tsk); |
193 | /* |
194 | * Ensure that the wakeup is visible before setting r_msg as |
195 | * the receiving end depends on it: either spinning on a nil, |
196 | * or dealing with -EAGAIN cases. See lockless receive part 1 |
197 | * and 2 in do_msgrcv(). |
198 | */ |
199 | smp_mb(); |
200 | msr->r_msg = ERR_PTR(res); |
201 | } |
202 | } |
203 | |
204 | /* |
205 | * freeque() wakes up waiters on the sender and receiver waiting queue, |
206 | * removes the message queue from message queue ID IDR, and cleans up all the |
207 | * messages associated with this queue. |
208 | * |
209 | * msg_ids.rwsem (writer) and the spinlock for this message queue are held |
210 | * before freeque() is called. msg_ids.rwsem remains locked on exit. |
211 | */ |
212 | static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
213 | { |
214 | struct msg_msg *msg, *t; |
215 | struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); |
216 | |
217 | expunge_all(msq, -EIDRM); |
218 | ss_wakeup(&msq->q_senders, 1); |
219 | msg_rmid(ns, msq); |
220 | ipc_unlock_object(&msq->q_perm); |
221 | rcu_read_unlock(); |
222 | |
223 | list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { |
224 | atomic_dec(&ns->msg_hdrs); |
225 | free_msg(msg); |
226 | } |
227 | atomic_sub(msq->q_cbytes, &ns->msg_bytes); |
228 | ipc_rcu_putref(msq, msg_rcu_free); |
229 | } |
230 | |
231 | /* |
232 | * Called with msg_ids.rwsem and ipcp locked. |
233 | */ |
234 | static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) |
235 | { |
236 | struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); |
237 | |
238 | return security_msg_queue_associate(msq, msgflg); |
239 | } |
240 | |
241 | SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) |
242 | { |
243 | struct ipc_namespace *ns; |
244 | static const struct ipc_ops msg_ops = { |
245 | .getnew = newque, |
246 | .associate = msg_security, |
247 | }; |
248 | struct ipc_params msg_params; |
249 | |
250 | ns = current->nsproxy->ipc_ns; |
251 | |
252 | msg_params.key = key; |
253 | msg_params.flg = msgflg; |
254 | |
255 | return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); |
256 | } |
257 | |
258 | static inline unsigned long |
259 | copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) |
260 | { |
261 | switch (version) { |
262 | case IPC_64: |
263 | return copy_to_user(buf, in, sizeof(*in)); |
264 | case IPC_OLD: |
265 | { |
266 | struct msqid_ds out; |
267 | |
268 | memset(&out, 0, sizeof(out)); |
269 | |
270 | ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); |
271 | |
272 | out.msg_stime = in->msg_stime; |
273 | out.msg_rtime = in->msg_rtime; |
274 | out.msg_ctime = in->msg_ctime; |
275 | |
276 | if (in->msg_cbytes > USHRT_MAX) |
277 | out.msg_cbytes = USHRT_MAX; |
278 | else |
279 | out.msg_cbytes = in->msg_cbytes; |
280 | out.msg_lcbytes = in->msg_cbytes; |
281 | |
282 | if (in->msg_qnum > USHRT_MAX) |
283 | out.msg_qnum = USHRT_MAX; |
284 | else |
285 | out.msg_qnum = in->msg_qnum; |
286 | |
287 | if (in->msg_qbytes > USHRT_MAX) |
288 | out.msg_qbytes = USHRT_MAX; |
289 | else |
290 | out.msg_qbytes = in->msg_qbytes; |
291 | out.msg_lqbytes = in->msg_qbytes; |
292 | |
293 | out.msg_lspid = in->msg_lspid; |
294 | out.msg_lrpid = in->msg_lrpid; |
295 | |
296 | return copy_to_user(buf, &out, sizeof(out)); |
297 | } |
298 | default: |
299 | return -EINVAL; |
300 | } |
301 | } |
302 | |
303 | static inline unsigned long |
304 | copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) |
305 | { |
306 | switch (version) { |
307 | case IPC_64: |
308 | if (copy_from_user(out, buf, sizeof(*out))) |
309 | return -EFAULT; |
310 | return 0; |
311 | case IPC_OLD: |
312 | { |
313 | struct msqid_ds tbuf_old; |
314 | |
315 | if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) |
316 | return -EFAULT; |
317 | |
318 | out->msg_perm.uid = tbuf_old.msg_perm.uid; |
319 | out->msg_perm.gid = tbuf_old.msg_perm.gid; |
320 | out->msg_perm.mode = tbuf_old.msg_perm.mode; |
321 | |
322 | if (tbuf_old.msg_qbytes == 0) |
323 | out->msg_qbytes = tbuf_old.msg_lqbytes; |
324 | else |
325 | out->msg_qbytes = tbuf_old.msg_qbytes; |
326 | |
327 | return 0; |
328 | } |
329 | default: |
330 | return -EINVAL; |
331 | } |
332 | } |
333 | |
334 | /* |
335 | * This function handles some msgctl commands which require the rwsem |
336 | * to be held in write mode. |
337 | * NOTE: no locks must be held, the rwsem is taken inside this function. |
338 | */ |
339 | static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, |
340 | struct msqid_ds __user *buf, int version) |
341 | { |
342 | struct kern_ipc_perm *ipcp; |
343 | struct msqid64_ds uninitialized_var(msqid64); |
344 | struct msg_queue *msq; |
345 | int err; |
346 | |
347 | if (cmd == IPC_SET) { |
348 | if (copy_msqid_from_user(&msqid64, buf, version)) |
349 | return -EFAULT; |
350 | } |
351 | |
352 | down_write(&msg_ids(ns).rwsem); |
353 | rcu_read_lock(); |
354 | |
355 | ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, |
356 | &msqid64.msg_perm, msqid64.msg_qbytes); |
357 | if (IS_ERR(ipcp)) { |
358 | err = PTR_ERR(ipcp); |
359 | goto out_unlock1; |
360 | } |
361 | |
362 | msq = container_of(ipcp, struct msg_queue, q_perm); |
363 | |
364 | err = security_msg_queue_msgctl(msq, cmd); |
365 | if (err) |
366 | goto out_unlock1; |
367 | |
368 | switch (cmd) { |
369 | case IPC_RMID: |
370 | ipc_lock_object(&msq->q_perm); |
371 | /* freeque unlocks the ipc object and rcu */ |
372 | freeque(ns, ipcp); |
373 | goto out_up; |
374 | case IPC_SET: |
375 | if (msqid64.msg_qbytes > ns->msg_ctlmnb && |
376 | !capable(CAP_SYS_RESOURCE)) { |
377 | err = -EPERM; |
378 | goto out_unlock1; |
379 | } |
380 | |
381 | ipc_lock_object(&msq->q_perm); |
382 | err = ipc_update_perm(&msqid64.msg_perm, ipcp); |
383 | if (err) |
384 | goto out_unlock0; |
385 | |
386 | msq->q_qbytes = msqid64.msg_qbytes; |
387 | |
388 | msq->q_ctime = get_seconds(); |
389 | /* sleeping receivers might be excluded by |
390 | * stricter permissions. |
391 | */ |
392 | expunge_all(msq, -EAGAIN); |
393 | /* sleeping senders might be able to send |
394 | * due to a larger queue size. |
395 | */ |
396 | ss_wakeup(&msq->q_senders, 0); |
397 | break; |
398 | default: |
399 | err = -EINVAL; |
400 | goto out_unlock1; |
401 | } |
402 | |
403 | out_unlock0: |
404 | ipc_unlock_object(&msq->q_perm); |
405 | out_unlock1: |
406 | rcu_read_unlock(); |
407 | out_up: |
408 | up_write(&msg_ids(ns).rwsem); |
409 | return err; |
410 | } |
411 | |
412 | static int msgctl_nolock(struct ipc_namespace *ns, int msqid, |
413 | int cmd, int version, void __user *buf) |
414 | { |
415 | int err; |
416 | struct msg_queue *msq; |
417 | |
418 | switch (cmd) { |
419 | case IPC_INFO: |
420 | case MSG_INFO: |
421 | { |
422 | struct msginfo msginfo; |
423 | int max_id; |
424 | |
425 | if (!buf) |
426 | return -EFAULT; |
427 | |
428 | /* |
429 | * We must not return kernel stack data. |
430 | * due to padding, it's not enough |
431 | * to set all member fields. |
432 | */ |
433 | err = security_msg_queue_msgctl(NULL, cmd); |
434 | if (err) |
435 | return err; |
436 | |
437 | memset(&msginfo, 0, sizeof(msginfo)); |
438 | msginfo.msgmni = ns->msg_ctlmni; |
439 | msginfo.msgmax = ns->msg_ctlmax; |
440 | msginfo.msgmnb = ns->msg_ctlmnb; |
441 | msginfo.msgssz = MSGSSZ; |
442 | msginfo.msgseg = MSGSEG; |
443 | down_read(&msg_ids(ns).rwsem); |
444 | if (cmd == MSG_INFO) { |
445 | msginfo.msgpool = msg_ids(ns).in_use; |
446 | msginfo.msgmap = atomic_read(&ns->msg_hdrs); |
447 | msginfo.msgtql = atomic_read(&ns->msg_bytes); |
448 | } else { |
449 | msginfo.msgmap = MSGMAP; |
450 | msginfo.msgpool = MSGPOOL; |
451 | msginfo.msgtql = MSGTQL; |
452 | } |
453 | max_id = ipc_get_maxid(&msg_ids(ns)); |
454 | up_read(&msg_ids(ns).rwsem); |
455 | if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) |
456 | return -EFAULT; |
457 | return (max_id < 0) ? 0 : max_id; |
458 | } |
459 | |
460 | case MSG_STAT: |
461 | case IPC_STAT: |
462 | { |
463 | struct msqid64_ds tbuf; |
464 | int success_return; |
465 | |
466 | if (!buf) |
467 | return -EFAULT; |
468 | |
469 | memset(&tbuf, 0, sizeof(tbuf)); |
470 | |
471 | rcu_read_lock(); |
472 | if (cmd == MSG_STAT) { |
473 | msq = msq_obtain_object(ns, msqid); |
474 | if (IS_ERR(msq)) { |
475 | err = PTR_ERR(msq); |
476 | goto out_unlock; |
477 | } |
478 | success_return = msq->q_perm.id; |
479 | } else { |
480 | msq = msq_obtain_object_check(ns, msqid); |
481 | if (IS_ERR(msq)) { |
482 | err = PTR_ERR(msq); |
483 | goto out_unlock; |
484 | } |
485 | success_return = 0; |
486 | } |
487 | |
488 | err = -EACCES; |
489 | if (ipcperms(ns, &msq->q_perm, S_IRUGO)) |
490 | goto out_unlock; |
491 | |
492 | err = security_msg_queue_msgctl(msq, cmd); |
493 | if (err) |
494 | goto out_unlock; |
495 | |
496 | kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); |
497 | tbuf.msg_stime = msq->q_stime; |
498 | tbuf.msg_rtime = msq->q_rtime; |
499 | tbuf.msg_ctime = msq->q_ctime; |
500 | tbuf.msg_cbytes = msq->q_cbytes; |
501 | tbuf.msg_qnum = msq->q_qnum; |
502 | tbuf.msg_qbytes = msq->q_qbytes; |
503 | tbuf.msg_lspid = msq->q_lspid; |
504 | tbuf.msg_lrpid = msq->q_lrpid; |
505 | rcu_read_unlock(); |
506 | |
507 | if (copy_msqid_to_user(buf, &tbuf, version)) |
508 | return -EFAULT; |
509 | return success_return; |
510 | } |
511 | |
512 | default: |
513 | return -EINVAL; |
514 | } |
515 | |
516 | return err; |
517 | out_unlock: |
518 | rcu_read_unlock(); |
519 | return err; |
520 | } |
521 | |
522 | SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) |
523 | { |
524 | int version; |
525 | struct ipc_namespace *ns; |
526 | |
527 | if (msqid < 0 || cmd < 0) |
528 | return -EINVAL; |
529 | |
530 | version = ipc_parse_version(&cmd); |
531 | ns = current->nsproxy->ipc_ns; |
532 | |
533 | switch (cmd) { |
534 | case IPC_INFO: |
535 | case MSG_INFO: |
536 | case MSG_STAT: /* msqid is an index rather than a msg queue id */ |
537 | case IPC_STAT: |
538 | return msgctl_nolock(ns, msqid, cmd, version, buf); |
539 | case IPC_SET: |
540 | case IPC_RMID: |
541 | return msgctl_down(ns, msqid, cmd, buf, version); |
542 | default: |
543 | return -EINVAL; |
544 | } |
545 | } |
546 | |
547 | static int testmsg(struct msg_msg *msg, long type, int mode) |
548 | { |
549 | switch (mode) { |
550 | case SEARCH_ANY: |
551 | case SEARCH_NUMBER: |
552 | return 1; |
553 | case SEARCH_LESSEQUAL: |
554 | if (msg->m_type <= type) |
555 | return 1; |
556 | break; |
557 | case SEARCH_EQUAL: |
558 | if (msg->m_type == type) |
559 | return 1; |
560 | break; |
561 | case SEARCH_NOTEQUAL: |
562 | if (msg->m_type != type) |
563 | return 1; |
564 | break; |
565 | } |
566 | return 0; |
567 | } |
568 | |
569 | static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) |
570 | { |
571 | struct msg_receiver *msr, *t; |
572 | |
573 | list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { |
574 | if (testmsg(msg, msr->r_msgtype, msr->r_mode) && |
575 | !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, |
576 | msr->r_msgtype, msr->r_mode)) { |
577 | |
578 | list_del(&msr->r_list); |
579 | if (msr->r_maxsize < msg->m_ts) { |
580 | /* initialize pipelined send ordering */ |
581 | msr->r_msg = NULL; |
582 | wake_up_process(msr->r_tsk); |
583 | smp_mb(); /* see barrier comment below */ |
584 | msr->r_msg = ERR_PTR(-E2BIG); |
585 | } else { |
586 | msr->r_msg = NULL; |
587 | msq->q_lrpid = task_pid_vnr(msr->r_tsk); |
588 | msq->q_rtime = get_seconds(); |
589 | wake_up_process(msr->r_tsk); |
590 | /* |
591 | * Ensure that the wakeup is visible before |
592 | * setting r_msg, as the receiving end depends |
593 | * on it. See lockless receive part 1 and 2 in |
594 | * do_msgrcv(). |
595 | */ |
596 | smp_mb(); |
597 | msr->r_msg = msg; |
598 | |
599 | return 1; |
600 | } |
601 | } |
602 | } |
603 | |
604 | return 0; |
605 | } |
606 | |
607 | long do_msgsnd(int msqid, long mtype, void __user *mtext, |
608 | size_t msgsz, int msgflg) |
609 | { |
610 | struct msg_queue *msq; |
611 | struct msg_msg *msg; |
612 | int err; |
613 | struct ipc_namespace *ns; |
614 | |
615 | ns = current->nsproxy->ipc_ns; |
616 | |
617 | if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) |
618 | return -EINVAL; |
619 | if (mtype < 1) |
620 | return -EINVAL; |
621 | |
622 | msg = load_msg(mtext, msgsz); |
623 | if (IS_ERR(msg)) |
624 | return PTR_ERR(msg); |
625 | |
626 | msg->m_type = mtype; |
627 | msg->m_ts = msgsz; |
628 | |
629 | rcu_read_lock(); |
630 | msq = msq_obtain_object_check(ns, msqid); |
631 | if (IS_ERR(msq)) { |
632 | err = PTR_ERR(msq); |
633 | goto out_unlock1; |
634 | } |
635 | |
636 | ipc_lock_object(&msq->q_perm); |
637 | |
638 | for (;;) { |
639 | struct msg_sender s; |
640 | |
641 | err = -EACCES; |
642 | if (ipcperms(ns, &msq->q_perm, S_IWUGO)) |
643 | goto out_unlock0; |
644 | |
645 | /* raced with RMID? */ |
646 | if (!ipc_valid_object(&msq->q_perm)) { |
647 | err = -EIDRM; |
648 | goto out_unlock0; |
649 | } |
650 | |
651 | err = security_msg_queue_msgsnd(msq, msg, msgflg); |
652 | if (err) |
653 | goto out_unlock0; |
654 | |
655 | if (msgsz + msq->q_cbytes <= msq->q_qbytes && |
656 | 1 + msq->q_qnum <= msq->q_qbytes) { |
657 | break; |
658 | } |
659 | |
660 | /* queue full, wait: */ |
661 | if (msgflg & IPC_NOWAIT) { |
662 | err = -EAGAIN; |
663 | goto out_unlock0; |
664 | } |
665 | |
666 | /* enqueue the sender and prepare to block */ |
667 | ss_add(msq, &s); |
668 | |
669 | if (!ipc_rcu_getref(msq)) { |
670 | err = -EIDRM; |
671 | goto out_unlock0; |
672 | } |
673 | |
674 | ipc_unlock_object(&msq->q_perm); |
675 | rcu_read_unlock(); |
676 | schedule(); |
677 | |
678 | rcu_read_lock(); |
679 | ipc_lock_object(&msq->q_perm); |
680 | |
681 | ipc_rcu_putref(msq, ipc_rcu_free); |
682 | /* raced with RMID? */ |
683 | if (!ipc_valid_object(&msq->q_perm)) { |
684 | err = -EIDRM; |
685 | goto out_unlock0; |
686 | } |
687 | |
688 | ss_del(&s); |
689 | |
690 | if (signal_pending(current)) { |
691 | err = -ERESTARTNOHAND; |
692 | goto out_unlock0; |
693 | } |
694 | |
695 | } |
696 | msq->q_lspid = task_tgid_vnr(current); |
697 | msq->q_stime = get_seconds(); |
698 | |
699 | if (!pipelined_send(msq, msg)) { |
700 | /* no one is waiting for this message, enqueue it */ |
701 | list_add_tail(&msg->m_list, &msq->q_messages); |
702 | msq->q_cbytes += msgsz; |
703 | msq->q_qnum++; |
704 | atomic_add(msgsz, &ns->msg_bytes); |
705 | atomic_inc(&ns->msg_hdrs); |
706 | } |
707 | |
708 | err = 0; |
709 | msg = NULL; |
710 | |
711 | out_unlock0: |
712 | ipc_unlock_object(&msq->q_perm); |
713 | out_unlock1: |
714 | rcu_read_unlock(); |
715 | if (msg != NULL) |
716 | free_msg(msg); |
717 | return err; |
718 | } |
719 | |
720 | SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, |
721 | int, msgflg) |
722 | { |
723 | long mtype; |
724 | |
725 | if (get_user(mtype, &msgp->mtype)) |
726 | return -EFAULT; |
727 | return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); |
728 | } |
729 | |
730 | static inline int convert_mode(long *msgtyp, int msgflg) |
731 | { |
732 | if (msgflg & MSG_COPY) |
733 | return SEARCH_NUMBER; |
734 | /* |
735 | * find message of correct type. |
736 | * msgtyp = 0 => get first. |
737 | * msgtyp > 0 => get first message of matching type. |
738 | * msgtyp < 0 => get message with least type must be < abs(msgtype). |
739 | */ |
740 | if (*msgtyp == 0) |
741 | return SEARCH_ANY; |
742 | if (*msgtyp < 0) { |
743 | *msgtyp = -*msgtyp; |
744 | return SEARCH_LESSEQUAL; |
745 | } |
746 | if (msgflg & MSG_EXCEPT) |
747 | return SEARCH_NOTEQUAL; |
748 | return SEARCH_EQUAL; |
749 | } |
750 | |
751 | static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) |
752 | { |
753 | struct msgbuf __user *msgp = dest; |
754 | size_t msgsz; |
755 | |
756 | if (put_user(msg->m_type, &msgp->mtype)) |
757 | return -EFAULT; |
758 | |
759 | msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; |
760 | if (store_msg(msgp->mtext, msg, msgsz)) |
761 | return -EFAULT; |
762 | return msgsz; |
763 | } |
764 | |
765 | #ifdef CONFIG_CHECKPOINT_RESTORE |
766 | /* |
767 | * This function creates new kernel message structure, large enough to store |
768 | * bufsz message bytes. |
769 | */ |
770 | static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) |
771 | { |
772 | struct msg_msg *copy; |
773 | |
774 | /* |
775 | * Create dummy message to copy real message to. |
776 | */ |
777 | copy = load_msg(buf, bufsz); |
778 | if (!IS_ERR(copy)) |
779 | copy->m_ts = bufsz; |
780 | return copy; |
781 | } |
782 | |
783 | static inline void free_copy(struct msg_msg *copy) |
784 | { |
785 | if (copy) |
786 | free_msg(copy); |
787 | } |
788 | #else |
789 | static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) |
790 | { |
791 | return ERR_PTR(-ENOSYS); |
792 | } |
793 | |
794 | static inline void free_copy(struct msg_msg *copy) |
795 | { |
796 | } |
797 | #endif |
798 | |
799 | static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) |
800 | { |
801 | struct msg_msg *msg, *found = NULL; |
802 | long count = 0; |
803 | |
804 | list_for_each_entry(msg, &msq->q_messages, m_list) { |
805 | if (testmsg(msg, *msgtyp, mode) && |
806 | !security_msg_queue_msgrcv(msq, msg, current, |
807 | *msgtyp, mode)) { |
808 | if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { |
809 | *msgtyp = msg->m_type - 1; |
810 | found = msg; |
811 | } else if (mode == SEARCH_NUMBER) { |
812 | if (*msgtyp == count) |
813 | return msg; |
814 | } else |
815 | return msg; |
816 | count++; |
817 | } |
818 | } |
819 | |
820 | return found ?: ERR_PTR(-EAGAIN); |
821 | } |
822 | |
823 | long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, |
824 | long (*msg_handler)(void __user *, struct msg_msg *, size_t)) |
825 | { |
826 | int mode; |
827 | struct msg_queue *msq; |
828 | struct ipc_namespace *ns; |
829 | struct msg_msg *msg, *copy = NULL; |
830 | |
831 | ns = current->nsproxy->ipc_ns; |
832 | |
833 | if (msqid < 0 || (long) bufsz < 0) |
834 | return -EINVAL; |
835 | |
836 | if (msgflg & MSG_COPY) { |
837 | if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT)) |
838 | return -EINVAL; |
839 | copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); |
840 | if (IS_ERR(copy)) |
841 | return PTR_ERR(copy); |
842 | } |
843 | mode = convert_mode(&msgtyp, msgflg); |
844 | |
845 | rcu_read_lock(); |
846 | msq = msq_obtain_object_check(ns, msqid); |
847 | if (IS_ERR(msq)) { |
848 | rcu_read_unlock(); |
849 | free_copy(copy); |
850 | return PTR_ERR(msq); |
851 | } |
852 | |
853 | for (;;) { |
854 | struct msg_receiver msr_d; |
855 | |
856 | msg = ERR_PTR(-EACCES); |
857 | if (ipcperms(ns, &msq->q_perm, S_IRUGO)) |
858 | goto out_unlock1; |
859 | |
860 | ipc_lock_object(&msq->q_perm); |
861 | |
862 | /* raced with RMID? */ |
863 | if (!ipc_valid_object(&msq->q_perm)) { |
864 | msg = ERR_PTR(-EIDRM); |
865 | goto out_unlock0; |
866 | } |
867 | |
868 | msg = find_msg(msq, &msgtyp, mode); |
869 | if (!IS_ERR(msg)) { |
870 | /* |
871 | * Found a suitable message. |
872 | * Unlink it from the queue. |
873 | */ |
874 | if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { |
875 | msg = ERR_PTR(-E2BIG); |
876 | goto out_unlock0; |
877 | } |
878 | /* |
879 | * If we are copying, then do not unlink message and do |
880 | * not update queue parameters. |
881 | */ |
882 | if (msgflg & MSG_COPY) { |
883 | msg = copy_msg(msg, copy); |
884 | goto out_unlock0; |
885 | } |
886 | |
887 | list_del(&msg->m_list); |
888 | msq->q_qnum--; |
889 | msq->q_rtime = get_seconds(); |
890 | msq->q_lrpid = task_tgid_vnr(current); |
891 | msq->q_cbytes -= msg->m_ts; |
892 | atomic_sub(msg->m_ts, &ns->msg_bytes); |
893 | atomic_dec(&ns->msg_hdrs); |
894 | ss_wakeup(&msq->q_senders, 0); |
895 | |
896 | goto out_unlock0; |
897 | } |
898 | |
899 | /* No message waiting. Wait for a message */ |
900 | if (msgflg & IPC_NOWAIT) { |
901 | msg = ERR_PTR(-ENOMSG); |
902 | goto out_unlock0; |
903 | } |
904 | |
905 | list_add_tail(&msr_d.r_list, &msq->q_receivers); |
906 | msr_d.r_tsk = current; |
907 | msr_d.r_msgtype = msgtyp; |
908 | msr_d.r_mode = mode; |
909 | if (msgflg & MSG_NOERROR) |
910 | msr_d.r_maxsize = INT_MAX; |
911 | else |
912 | msr_d.r_maxsize = bufsz; |
913 | msr_d.r_msg = ERR_PTR(-EAGAIN); |
914 | __set_current_state(TASK_INTERRUPTIBLE); |
915 | |
916 | ipc_unlock_object(&msq->q_perm); |
917 | rcu_read_unlock(); |
918 | schedule(); |
919 | |
920 | /* Lockless receive, part 1: |
921 | * Disable preemption. We don't hold a reference to the queue |
922 | * and getting a reference would defeat the idea of a lockless |
923 | * operation, thus the code relies on rcu to guarantee the |
924 | * existence of msq: |
925 | * Prior to destruction, expunge_all(-EIRDM) changes r_msg. |
926 | * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. |
927 | * rcu_read_lock() prevents preemption between reading r_msg |
928 | * and acquiring the q_perm.lock in ipc_lock_object(). |
929 | */ |
930 | rcu_read_lock(); |
931 | |
932 | /* Lockless receive, part 2: |
933 | * Wait until pipelined_send or expunge_all are outside of |
934 | * wake_up_process(). There is a race with exit(), see |
935 | * ipc/mqueue.c for the details. |
936 | */ |
937 | msg = (struct msg_msg *)msr_d.r_msg; |
938 | while (msg == NULL) { |
939 | cpu_relax(); |
940 | msg = (struct msg_msg *)msr_d.r_msg; |
941 | } |
942 | |
943 | /* Lockless receive, part 3: |
944 | * If there is a message or an error then accept it without |
945 | * locking. |
946 | */ |
947 | if (msg != ERR_PTR(-EAGAIN)) |
948 | goto out_unlock1; |
949 | |
950 | /* Lockless receive, part 3: |
951 | * Acquire the queue spinlock. |
952 | */ |
953 | ipc_lock_object(&msq->q_perm); |
954 | |
955 | /* Lockless receive, part 4: |
956 | * Repeat test after acquiring the spinlock. |
957 | */ |
958 | msg = (struct msg_msg *)msr_d.r_msg; |
959 | if (msg != ERR_PTR(-EAGAIN)) |
960 | goto out_unlock0; |
961 | |
962 | list_del(&msr_d.r_list); |
963 | if (signal_pending(current)) { |
964 | msg = ERR_PTR(-ERESTARTNOHAND); |
965 | goto out_unlock0; |
966 | } |
967 | |
968 | ipc_unlock_object(&msq->q_perm); |
969 | } |
970 | |
971 | out_unlock0: |
972 | ipc_unlock_object(&msq->q_perm); |
973 | out_unlock1: |
974 | rcu_read_unlock(); |
975 | if (IS_ERR(msg)) { |
976 | free_copy(copy); |
977 | return PTR_ERR(msg); |
978 | } |
979 | |
980 | bufsz = msg_handler(buf, msg, bufsz); |
981 | free_msg(msg); |
982 | |
983 | return bufsz; |
984 | } |
985 | |
986 | SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, |
987 | long, msgtyp, int, msgflg) |
988 | { |
989 | return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); |
990 | } |
991 | |
992 | /* |
993 | * Scale msgmni with the available lowmem size: the memory dedicated to msg |
994 | * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. |
995 | * Also take into account the number of nsproxies created so far. |
996 | * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. |
997 | */ |
998 | void recompute_msgmni(struct ipc_namespace *ns) |
999 | { |
1000 | struct sysinfo i; |
1001 | unsigned long allowed; |
1002 | int nb_ns; |
1003 | |
1004 | si_meminfo(&i); |
1005 | allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) |
1006 | / MSGMNB; |
1007 | nb_ns = atomic_read(&nr_ipc_ns); |
1008 | allowed /= nb_ns; |
1009 | |
1010 | if (allowed < MSGMNI) { |
1011 | ns->msg_ctlmni = MSGMNI; |
1012 | return; |
1013 | } |
1014 | |
1015 | if (allowed > IPCMNI / nb_ns) { |
1016 | ns->msg_ctlmni = IPCMNI / nb_ns; |
1017 | return; |
1018 | } |
1019 | |
1020 | ns->msg_ctlmni = allowed; |
1021 | } |
1022 | |
1023 | void msg_init_ns(struct ipc_namespace *ns) |
1024 | { |
1025 | ns->msg_ctlmax = MSGMAX; |
1026 | ns->msg_ctlmnb = MSGMNB; |
1027 | |
1028 | recompute_msgmni(ns); |
1029 | |
1030 | atomic_set(&ns->msg_bytes, 0); |
1031 | atomic_set(&ns->msg_hdrs, 0); |
1032 | ipc_init_ids(&ns->ids[IPC_MSG_IDS]); |
1033 | } |
1034 | |
1035 | #ifdef CONFIG_IPC_NS |
1036 | void msg_exit_ns(struct ipc_namespace *ns) |
1037 | { |
1038 | free_ipcs(ns, &msg_ids(ns), freeque); |
1039 | idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); |
1040 | } |
1041 | #endif |
1042 | |
1043 | #ifdef CONFIG_PROC_FS |
1044 | static int sysvipc_msg_proc_show(struct seq_file *s, void *it) |
1045 | { |
1046 | struct user_namespace *user_ns = seq_user_ns(s); |
1047 | struct msg_queue *msq = it; |
1048 | |
1049 | return seq_printf(s, |
1050 | "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", |
1051 | msq->q_perm.key, |
1052 | msq->q_perm.id, |
1053 | msq->q_perm.mode, |
1054 | msq->q_cbytes, |
1055 | msq->q_qnum, |
1056 | msq->q_lspid, |
1057 | msq->q_lrpid, |
1058 | from_kuid_munged(user_ns, msq->q_perm.uid), |
1059 | from_kgid_munged(user_ns, msq->q_perm.gid), |
1060 | from_kuid_munged(user_ns, msq->q_perm.cuid), |
1061 | from_kgid_munged(user_ns, msq->q_perm.cgid), |
1062 | msq->q_stime, |
1063 | msq->q_rtime, |
1064 | msq->q_ctime); |
1065 | } |
1066 | #endif |
1067 | |
1068 | void __init msg_init(void) |
1069 | { |
1070 | msg_init_ns(&init_ipc_ns); |
1071 | |
1072 | printk(KERN_INFO "msgmni has been set to %d\n", |
1073 | init_ipc_ns.msg_ctlmni); |
1074 | |
1075 | ipc_init_proc_interface("sysvipc/msg", |
1076 | " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", |
1077 | IPC_MSG_IDS, sysvipc_msg_proc_show); |
1078 | } |
1079 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9