Root/ipc/sem.c

1/*
2 * linux/ipc/sem.c
3 * Copyright (C) 1992 Krishna Balasubramanian
4 * Copyright (C) 1995 Eric Schenk, Bruno Haible
5 *
6 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
7 *
8 * SMP-threaded, sysctl's added
9 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
10 * Enforced range limit on SEM_UNDO
11 * (c) 2001 Red Hat Inc
12 * Lockless wakeup
13 * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
14 * Further wakeup optimizations, documentation
15 * (c) 2010 Manfred Spraul <manfred@colorfullife.com>
16 *
17 * support for audit of ipc object properties and permission changes
18 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19 *
20 * namespaces support
21 * OpenVZ, SWsoft Inc.
22 * Pavel Emelianov <xemul@openvz.org>
23 *
24 * Implementation notes: (May 2010)
25 * This file implements System V semaphores.
26 *
27 * User space visible behavior:
28 * - FIFO ordering for semop() operations (just FIFO, not starvation
29 * protection)
30 * - multiple semaphore operations that alter the same semaphore in
31 * one semop() are handled.
32 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and
33 * SETALL calls.
34 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
35 * - undo adjustments at process exit are limited to 0..SEMVMX.
36 * - namespace are supported.
37 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing
38 * to /proc/sys/kernel/sem.
39 * - statistics about the usage are reported in /proc/sysvipc/sem.
40 *
41 * Internals:
42 * - scalability:
43 * - all global variables are read-mostly.
44 * - semop() calls and semctl(RMID) are synchronized by RCU.
45 * - most operations do write operations (actually: spin_lock calls) to
46 * the per-semaphore array structure.
47 * Thus: Perfect SMP scaling between independent semaphore arrays.
48 * If multiple semaphores in one array are used, then cache line
49 * trashing on the semaphore array spinlock will limit the scaling.
50 * - semncnt and semzcnt are calculated on demand in count_semncnt() and
51 * count_semzcnt()
52 * - the task that performs a successful semop() scans the list of all
53 * sleeping tasks and completes any pending operations that can be fulfilled.
54 * Semaphores are actively given to waiting tasks (necessary for FIFO).
55 * (see update_queue())
56 * - To improve the scalability, the actual wake-up calls are performed after
57 * dropping all locks. (see wake_up_sem_queue_prepare(),
58 * wake_up_sem_queue_do())
59 * - All work is done by the waker, the woken up task does not have to do
60 * anything - not even acquiring a lock or dropping a refcount.
61 * - A woken up task may not even touch the semaphore array anymore, it may
62 * have been destroyed already by a semctl(RMID).
63 * - The synchronizations between wake-ups due to a timeout/signal and a
64 * wake-up due to a completed semaphore operation is achieved by using an
65 * intermediate state (IN_WAKEUP).
66 * - UNDO values are stored in an array (one per process and per
67 * semaphore array, lazily allocated). For backwards compatibility, multiple
68 * modes for the UNDO variables are supported (per process, per thread)
69 * (see copy_semundo, CLONE_SYSVSEM)
70 * - There are two lists of the pending operations: a per-array list
71 * and per-semaphore list (stored in the array). This allows to achieve FIFO
72 * ordering without always scanning all pending operations.
73 * The worst-case behavior is nevertheless O(N^2) for N wakeups.
74 */
75
76#include <linux/slab.h>
77#include <linux/spinlock.h>
78#include <linux/init.h>
79#include <linux/proc_fs.h>
80#include <linux/time.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/audit.h>
84#include <linux/capability.h>
85#include <linux/seq_file.h>
86#include <linux/rwsem.h>
87#include <linux/nsproxy.h>
88#include <linux/ipc_namespace.h>
89
90#include <asm/uaccess.h>
91#include "util.h"
92
93/* One semaphore structure for each semaphore in the system. */
94struct sem {
95    int semval; /* current value */
96    int sempid; /* pid of last operation */
97    struct list_head sem_pending; /* pending single-sop operations */
98};
99
100/* One queue for each sleeping process in the system. */
101struct sem_queue {
102    struct list_head simple_list; /* queue of pending operations */
103    struct list_head list; /* queue of pending operations */
104    struct task_struct *sleeper; /* this process */
105    struct sem_undo *undo; /* undo structure */
106    int pid; /* process id of requesting process */
107    int status; /* completion status of operation */
108    struct sembuf *sops; /* array of pending operations */
109    int nsops; /* number of operations */
110    int alter; /* does *sops alter the array? */
111};
112
113/* Each task has a list of undo requests. They are executed automatically
114 * when the process exits.
115 */
116struct sem_undo {
117    struct list_head list_proc; /* per-process list: *
118                         * all undos from one process
119                         * rcu protected */
120    struct rcu_head rcu; /* rcu struct for sem_undo */
121    struct sem_undo_list *ulp; /* back ptr to sem_undo_list */
122    struct list_head list_id; /* per semaphore array list:
123                         * all undos for one array */
124    int semid; /* semaphore set identifier */
125    short *semadj; /* array of adjustments */
126                        /* one per semaphore */
127};
128
129/* sem_undo_list controls shared access to the list of sem_undo structures
130 * that may be shared among all a CLONE_SYSVSEM task group.
131 */
132struct sem_undo_list {
133    atomic_t refcnt;
134    spinlock_t lock;
135    struct list_head list_proc;
136};
137
138
139#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])
140
141#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
142#define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid)
143
144static int newary(struct ipc_namespace *, struct ipc_params *);
145static void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
146#ifdef CONFIG_PROC_FS
147static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
148#endif
149
150#define SEMMSL_FAST 256 /* 512 bytes on stack */
151#define SEMOPM_FAST 64 /* ~ 372 bytes on stack */
152
153/*
154 * linked list protection:
155 * sem_undo.id_next,
156 * sem_array.sem_pending{,last},
157 * sem_array.sem_undo: sem_lock() for read/write
158 * sem_undo.proc_next: only "current" is allowed to read/write that field.
159 *
160 */
161
162#define sc_semmsl sem_ctls[0]
163#define sc_semmns sem_ctls[1]
164#define sc_semopm sem_ctls[2]
165#define sc_semmni sem_ctls[3]
166
167void sem_init_ns(struct ipc_namespace *ns)
168{
169    ns->sc_semmsl = SEMMSL;
170    ns->sc_semmns = SEMMNS;
171    ns->sc_semopm = SEMOPM;
172    ns->sc_semmni = SEMMNI;
173    ns->used_sems = 0;
174    ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
175}
176
177#ifdef CONFIG_IPC_NS
178void sem_exit_ns(struct ipc_namespace *ns)
179{
180    free_ipcs(ns, &sem_ids(ns), freeary);
181    idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
182}
183#endif
184
185void __init sem_init (void)
186{
187    sem_init_ns(&init_ipc_ns);
188    ipc_init_proc_interface("sysvipc/sem",
189                " key semid perms nsems uid gid cuid cgid otime ctime\n",
190                IPC_SEM_IDS, sysvipc_sem_proc_show);
191}
192
193/*
194 * sem_lock_(check_) routines are called in the paths where the rw_mutex
195 * is not held.
196 */
197static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id)
198{
199    struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id);
200
201    if (IS_ERR(ipcp))
202        return (struct sem_array *)ipcp;
203
204    return container_of(ipcp, struct sem_array, sem_perm);
205}
206
207static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
208                        int id)
209{
210    struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id);
211
212    if (IS_ERR(ipcp))
213        return (struct sem_array *)ipcp;
214
215    return container_of(ipcp, struct sem_array, sem_perm);
216}
217
218static inline void sem_lock_and_putref(struct sem_array *sma)
219{
220    ipc_lock_by_ptr(&sma->sem_perm);
221    ipc_rcu_putref(sma);
222}
223
224static inline void sem_getref_and_unlock(struct sem_array *sma)
225{
226    ipc_rcu_getref(sma);
227    ipc_unlock(&(sma)->sem_perm);
228}
229
230static inline void sem_putref(struct sem_array *sma)
231{
232    ipc_lock_by_ptr(&sma->sem_perm);
233    ipc_rcu_putref(sma);
234    ipc_unlock(&(sma)->sem_perm);
235}
236
237static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
238{
239    ipc_rmid(&sem_ids(ns), &s->sem_perm);
240}
241
242/*
243 * Lockless wakeup algorithm:
244 * Without the check/retry algorithm a lockless wakeup is possible:
245 * - queue.status is initialized to -EINTR before blocking.
246 * - wakeup is performed by
247 * * unlinking the queue entry from sma->sem_pending
248 * * setting queue.status to IN_WAKEUP
249 * This is the notification for the blocked thread that a
250 * result value is imminent.
251 * * call wake_up_process
252 * * set queue.status to the final value.
253 * - the previously blocked thread checks queue.status:
254 * * if it's IN_WAKEUP, then it must wait until the value changes
255 * * if it's not -EINTR, then the operation was completed by
256 * update_queue. semtimedop can return queue.status without
257 * performing any operation on the sem array.
258 * * otherwise it must acquire the spinlock and check what's up.
259 *
260 * The two-stage algorithm is necessary to protect against the following
261 * races:
262 * - if queue.status is set after wake_up_process, then the woken up idle
263 * thread could race forward and try (and fail) to acquire sma->lock
264 * before update_queue had a chance to set queue.status
265 * - if queue.status is written before wake_up_process and if the
266 * blocked process is woken up by a signal between writing
267 * queue.status and the wake_up_process, then the woken up
268 * process could return from semtimedop and die by calling
269 * sys_exit before wake_up_process is called. Then wake_up_process
270 * will oops, because the task structure is already invalid.
271 * (yes, this happened on s390 with sysv msg).
272 *
273 */
274#define IN_WAKEUP 1
275
276/**
277 * newary - Create a new semaphore set
278 * @ns: namespace
279 * @params: ptr to the structure that contains key, semflg and nsems
280 *
281 * Called with sem_ids.rw_mutex held (as a writer)
282 */
283
284static int newary(struct ipc_namespace *ns, struct ipc_params *params)
285{
286    int id;
287    int retval;
288    struct sem_array *sma;
289    int size;
290    key_t key = params->key;
291    int nsems = params->u.nsems;
292    int semflg = params->flg;
293    int i;
294
295    if (!nsems)
296        return -EINVAL;
297    if (ns->used_sems + nsems > ns->sc_semmns)
298        return -ENOSPC;
299
300    size = sizeof (*sma) + nsems * sizeof (struct sem);
301    sma = ipc_rcu_alloc(size);
302    if (!sma) {
303        return -ENOMEM;
304    }
305    memset (sma, 0, size);
306
307    sma->sem_perm.mode = (semflg & S_IRWXUGO);
308    sma->sem_perm.key = key;
309
310    sma->sem_perm.security = NULL;
311    retval = security_sem_alloc(sma);
312    if (retval) {
313        ipc_rcu_putref(sma);
314        return retval;
315    }
316
317    id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
318    if (id < 0) {
319        security_sem_free(sma);
320        ipc_rcu_putref(sma);
321        return id;
322    }
323    ns->used_sems += nsems;
324
325    sma->sem_base = (struct sem *) &sma[1];
326
327    for (i = 0; i < nsems; i++)
328        INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
329
330    sma->complex_count = 0;
331    INIT_LIST_HEAD(&sma->sem_pending);
332    INIT_LIST_HEAD(&sma->list_id);
333    sma->sem_nsems = nsems;
334    sma->sem_ctime = get_seconds();
335    sem_unlock(sma);
336
337    return sma->sem_perm.id;
338}
339
340
341/*
342 * Called with sem_ids.rw_mutex and ipcp locked.
343 */
344static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
345{
346    struct sem_array *sma;
347
348    sma = container_of(ipcp, struct sem_array, sem_perm);
349    return security_sem_associate(sma, semflg);
350}
351
352/*
353 * Called with sem_ids.rw_mutex and ipcp locked.
354 */
355static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
356                struct ipc_params *params)
357{
358    struct sem_array *sma;
359
360    sma = container_of(ipcp, struct sem_array, sem_perm);
361    if (params->u.nsems > sma->sem_nsems)
362        return -EINVAL;
363
364    return 0;
365}
366
367SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
368{
369    struct ipc_namespace *ns;
370    struct ipc_ops sem_ops;
371    struct ipc_params sem_params;
372
373    ns = current->nsproxy->ipc_ns;
374
375    if (nsems < 0 || nsems > ns->sc_semmsl)
376        return -EINVAL;
377
378    sem_ops.getnew = newary;
379    sem_ops.associate = sem_security;
380    sem_ops.more_checks = sem_more_checks;
381
382    sem_params.key = key;
383    sem_params.flg = semflg;
384    sem_params.u.nsems = nsems;
385
386    return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
387}
388
389/*
390 * Determine whether a sequence of semaphore operations would succeed
391 * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
392 */
393
394static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
395                 int nsops, struct sem_undo *un, int pid)
396{
397    int result, sem_op;
398    struct sembuf *sop;
399    struct sem * curr;
400
401    for (sop = sops; sop < sops + nsops; sop++) {
402        curr = sma->sem_base + sop->sem_num;
403        sem_op = sop->sem_op;
404        result = curr->semval;
405  
406        if (!sem_op && result)
407            goto would_block;
408
409        result += sem_op;
410        if (result < 0)
411            goto would_block;
412        if (result > SEMVMX)
413            goto out_of_range;
414        if (sop->sem_flg & SEM_UNDO) {
415            int undo = un->semadj[sop->sem_num] - sem_op;
416            /*
417              * Exceeding the undo range is an error.
418             */
419            if (undo < (-SEMAEM - 1) || undo > SEMAEM)
420                goto out_of_range;
421        }
422        curr->semval = result;
423    }
424
425    sop--;
426    while (sop >= sops) {
427        sma->sem_base[sop->sem_num].sempid = pid;
428        if (sop->sem_flg & SEM_UNDO)
429            un->semadj[sop->sem_num] -= sop->sem_op;
430        sop--;
431    }
432    
433    return 0;
434
435out_of_range:
436    result = -ERANGE;
437    goto undo;
438
439would_block:
440    if (sop->sem_flg & IPC_NOWAIT)
441        result = -EAGAIN;
442    else
443        result = 1;
444
445undo:
446    sop--;
447    while (sop >= sops) {
448        sma->sem_base[sop->sem_num].semval -= sop->sem_op;
449        sop--;
450    }
451
452    return result;
453}
454
455/** wake_up_sem_queue_prepare(q, error): Prepare wake-up
456 * @q: queue entry that must be signaled
457 * @error: Error value for the signal
458 *
459 * Prepare the wake-up of the queue entry q.
460 */
461static void wake_up_sem_queue_prepare(struct list_head *pt,
462                struct sem_queue *q, int error)
463{
464    if (list_empty(pt)) {
465        /*
466         * Hold preempt off so that we don't get preempted and have the
467         * wakee busy-wait until we're scheduled back on.
468         */
469        preempt_disable();
470    }
471    q->status = IN_WAKEUP;
472    q->pid = error;
473
474    list_add_tail(&q->simple_list, pt);
475}
476
477/**
478 * wake_up_sem_queue_do(pt) - do the actual wake-up
479 * @pt: list of tasks to be woken up
480 *
481 * Do the actual wake-up.
482 * The function is called without any locks held, thus the semaphore array
483 * could be destroyed already and the tasks can disappear as soon as the
484 * status is set to the actual return code.
485 */
486static void wake_up_sem_queue_do(struct list_head *pt)
487{
488    struct sem_queue *q, *t;
489    int did_something;
490
491    did_something = !list_empty(pt);
492    list_for_each_entry_safe(q, t, pt, simple_list) {
493        wake_up_process(q->sleeper);
494        /* q can disappear immediately after writing q->status. */
495        smp_wmb();
496        q->status = q->pid;
497    }
498    if (did_something)
499        preempt_enable();
500}
501
502static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
503{
504    list_del(&q->list);
505    if (q->nsops == 1)
506        list_del(&q->simple_list);
507    else
508        sma->complex_count--;
509}
510
511/** check_restart(sma, q)
512 * @sma: semaphore array
513 * @q: the operation that just completed
514 *
515 * update_queue is O(N^2) when it restarts scanning the whole queue of
516 * waiting operations. Therefore this function checks if the restart is
517 * really necessary. It is called after a previously waiting operation
518 * was completed.
519 */
520static int check_restart(struct sem_array *sma, struct sem_queue *q)
521{
522    struct sem *curr;
523    struct sem_queue *h;
524
525    /* if the operation didn't modify the array, then no restart */
526    if (q->alter == 0)
527        return 0;
528
529    /* pending complex operations are too difficult to analyse */
530    if (sma->complex_count)
531        return 1;
532
533    /* we were a sleeping complex operation. Too difficult */
534    if (q->nsops > 1)
535        return 1;
536
537    curr = sma->sem_base + q->sops[0].sem_num;
538
539    /* No-one waits on this queue */
540    if (list_empty(&curr->sem_pending))
541        return 0;
542
543    /* the new semaphore value */
544    if (curr->semval) {
545        /* It is impossible that someone waits for the new value:
546         * - q is a previously sleeping simple operation that
547         * altered the array. It must be a decrement, because
548         * simple increments never sleep.
549         * - The value is not 0, thus wait-for-zero won't proceed.
550         * - If there are older (higher priority) decrements
551         * in the queue, then they have observed the original
552         * semval value and couldn't proceed. The operation
553         * decremented to value - thus they won't proceed either.
554         */
555        BUG_ON(q->sops[0].sem_op >= 0);
556        return 0;
557    }
558    /*
559     * semval is 0. Check if there are wait-for-zero semops.
560     * They must be the first entries in the per-semaphore simple queue
561     */
562    h = list_first_entry(&curr->sem_pending, struct sem_queue, simple_list);
563    BUG_ON(h->nsops != 1);
564    BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num);
565
566    /* Yes, there is a wait-for-zero semop. Restart */
567    if (h->sops[0].sem_op == 0)
568        return 1;
569
570    /* Again - no-one is waiting for the new value. */
571    return 0;
572}
573
574
575/**
576 * update_queue(sma, semnum): Look for tasks that can be completed.
577 * @sma: semaphore array.
578 * @semnum: semaphore that was modified.
579 * @pt: list head for the tasks that must be woken up.
580 *
581 * update_queue must be called after a semaphore in a semaphore array
582 * was modified. If multiple semaphore were modified, then @semnum
583 * must be set to -1.
584 * The tasks that must be woken up are added to @pt. The return code
585 * is stored in q->pid.
586 * The function return 1 if at least one semop was completed successfully.
587 */
588static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
589{
590    struct sem_queue *q;
591    struct list_head *walk;
592    struct list_head *pending_list;
593    int offset;
594    int semop_completed = 0;
595
596    /* if there are complex operations around, then knowing the semaphore
597     * that was modified doesn't help us. Assume that multiple semaphores
598     * were modified.
599     */
600    if (sma->complex_count)
601        semnum = -1;
602
603    if (semnum == -1) {
604        pending_list = &sma->sem_pending;
605        offset = offsetof(struct sem_queue, list);
606    } else {
607        pending_list = &sma->sem_base[semnum].sem_pending;
608        offset = offsetof(struct sem_queue, simple_list);
609    }
610
611again:
612    walk = pending_list->next;
613    while (walk != pending_list) {
614        int error, restart;
615
616        q = (struct sem_queue *)((char *)walk - offset);
617        walk = walk->next;
618
619        /* If we are scanning the single sop, per-semaphore list of
620         * one semaphore and that semaphore is 0, then it is not
621         * necessary to scan the "alter" entries: simple increments
622         * that affect only one entry succeed immediately and cannot
623         * be in the per semaphore pending queue, and decrements
624         * cannot be successful if the value is already 0.
625         */
626        if (semnum != -1 && sma->sem_base[semnum].semval == 0 &&
627                q->alter)
628            break;
629
630        error = try_atomic_semop(sma, q->sops, q->nsops,
631                     q->undo, q->pid);
632
633        /* Does q->sleeper still need to sleep? */
634        if (error > 0)
635            continue;
636
637        unlink_queue(sma, q);
638
639        if (error) {
640            restart = 0;
641        } else {
642            semop_completed = 1;
643            restart = check_restart(sma, q);
644        }
645
646        wake_up_sem_queue_prepare(pt, q, error);
647        if (restart)
648            goto again;
649    }
650    return semop_completed;
651}
652
653/**
654 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
655 * @sma: semaphore array
656 * @sops: operations that were performed
657 * @nsops: number of operations
658 * @otime: force setting otime
659 * @pt: list head of the tasks that must be woken up.
660 *
661 * do_smart_update() does the required called to update_queue, based on the
662 * actual changes that were performed on the semaphore array.
663 * Note that the function does not do the actual wake-up: the caller is
664 * responsible for calling wake_up_sem_queue_do(@pt).
665 * It is safe to perform this call after dropping all locks.
666 */
667static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
668            int otime, struct list_head *pt)
669{
670    int i;
671
672    if (sma->complex_count || sops == NULL) {
673        if (update_queue(sma, -1, pt))
674            otime = 1;
675        goto done;
676    }
677
678    for (i = 0; i < nsops; i++) {
679        if (sops[i].sem_op > 0 ||
680            (sops[i].sem_op < 0 &&
681                sma->sem_base[sops[i].sem_num].semval == 0))
682            if (update_queue(sma, sops[i].sem_num, pt))
683                otime = 1;
684    }
685done:
686    if (otime)
687        sma->sem_otime = get_seconds();
688}
689
690
691/* The following counts are associated to each semaphore:
692 * semncnt number of tasks waiting on semval being nonzero
693 * semzcnt number of tasks waiting on semval being zero
694 * This model assumes that a task waits on exactly one semaphore.
695 * Since semaphore operations are to be performed atomically, tasks actually
696 * wait on a whole sequence of semaphores simultaneously.
697 * The counts we return here are a rough approximation, but still
698 * warrant that semncnt+semzcnt>0 if the task is on the pending queue.
699 */
700static int count_semncnt (struct sem_array * sma, ushort semnum)
701{
702    int semncnt;
703    struct sem_queue * q;
704
705    semncnt = 0;
706    list_for_each_entry(q, &sma->sem_pending, list) {
707        struct sembuf * sops = q->sops;
708        int nsops = q->nsops;
709        int i;
710        for (i = 0; i < nsops; i++)
711            if (sops[i].sem_num == semnum
712                && (sops[i].sem_op < 0)
713                && !(sops[i].sem_flg & IPC_NOWAIT))
714                semncnt++;
715    }
716    return semncnt;
717}
718
719static int count_semzcnt (struct sem_array * sma, ushort semnum)
720{
721    int semzcnt;
722    struct sem_queue * q;
723
724    semzcnt = 0;
725    list_for_each_entry(q, &sma->sem_pending, list) {
726        struct sembuf * sops = q->sops;
727        int nsops = q->nsops;
728        int i;
729        for (i = 0; i < nsops; i++)
730            if (sops[i].sem_num == semnum
731                && (sops[i].sem_op == 0)
732                && !(sops[i].sem_flg & IPC_NOWAIT))
733                semzcnt++;
734    }
735    return semzcnt;
736}
737
738/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
739 * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
740 * remains locked on exit.
741 */
742static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
743{
744    struct sem_undo *un, *tu;
745    struct sem_queue *q, *tq;
746    struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
747    struct list_head tasks;
748
749    /* Free the existing undo structures for this semaphore set. */
750    assert_spin_locked(&sma->sem_perm.lock);
751    list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
752        list_del(&un->list_id);
753        spin_lock(&un->ulp->lock);
754        un->semid = -1;
755        list_del_rcu(&un->list_proc);
756        spin_unlock(&un->ulp->lock);
757        kfree_rcu(un, rcu);
758    }
759
760    /* Wake up all pending processes and let them fail with EIDRM. */
761    INIT_LIST_HEAD(&tasks);
762    list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
763        unlink_queue(sma, q);
764        wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
765    }
766
767    /* Remove the semaphore set from the IDR */
768    sem_rmid(ns, sma);
769    sem_unlock(sma);
770
771    wake_up_sem_queue_do(&tasks);
772    ns->used_sems -= sma->sem_nsems;
773    security_sem_free(sma);
774    ipc_rcu_putref(sma);
775}
776
777static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
778{
779    switch(version) {
780    case IPC_64:
781        return copy_to_user(buf, in, sizeof(*in));
782    case IPC_OLD:
783        {
784        struct semid_ds out;
785
786        memset(&out, 0, sizeof(out));
787
788        ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);
789
790        out.sem_otime = in->sem_otime;
791        out.sem_ctime = in->sem_ctime;
792        out.sem_nsems = in->sem_nsems;
793
794        return copy_to_user(buf, &out, sizeof(out));
795        }
796    default:
797        return -EINVAL;
798    }
799}
800
801static int semctl_nolock(struct ipc_namespace *ns, int semid,
802             int cmd, int version, union semun arg)
803{
804    int err;
805    struct sem_array *sma;
806
807    switch(cmd) {
808    case IPC_INFO:
809    case SEM_INFO:
810    {
811        struct seminfo seminfo;
812        int max_id;
813
814        err = security_sem_semctl(NULL, cmd);
815        if (err)
816            return err;
817        
818        memset(&seminfo,0,sizeof(seminfo));
819        seminfo.semmni = ns->sc_semmni;
820        seminfo.semmns = ns->sc_semmns;
821        seminfo.semmsl = ns->sc_semmsl;
822        seminfo.semopm = ns->sc_semopm;
823        seminfo.semvmx = SEMVMX;
824        seminfo.semmnu = SEMMNU;
825        seminfo.semmap = SEMMAP;
826        seminfo.semume = SEMUME;
827        down_read(&sem_ids(ns).rw_mutex);
828        if (cmd == SEM_INFO) {
829            seminfo.semusz = sem_ids(ns).in_use;
830            seminfo.semaem = ns->used_sems;
831        } else {
832            seminfo.semusz = SEMUSZ;
833            seminfo.semaem = SEMAEM;
834        }
835        max_id = ipc_get_maxid(&sem_ids(ns));
836        up_read(&sem_ids(ns).rw_mutex);
837        if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo)))
838            return -EFAULT;
839        return (max_id < 0) ? 0: max_id;
840    }
841    case IPC_STAT:
842    case SEM_STAT:
843    {
844        struct semid64_ds tbuf;
845        int id;
846
847        if (cmd == SEM_STAT) {
848            sma = sem_lock(ns, semid);
849            if (IS_ERR(sma))
850                return PTR_ERR(sma);
851            id = sma->sem_perm.id;
852        } else {
853            sma = sem_lock_check(ns, semid);
854            if (IS_ERR(sma))
855                return PTR_ERR(sma);
856            id = 0;
857        }
858
859        err = -EACCES;
860        if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
861            goto out_unlock;
862
863        err = security_sem_semctl(sma, cmd);
864        if (err)
865            goto out_unlock;
866
867        memset(&tbuf, 0, sizeof(tbuf));
868
869        kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
870        tbuf.sem_otime = sma->sem_otime;
871        tbuf.sem_ctime = sma->sem_ctime;
872        tbuf.sem_nsems = sma->sem_nsems;
873        sem_unlock(sma);
874        if (copy_semid_to_user (arg.buf, &tbuf, version))
875            return -EFAULT;
876        return id;
877    }
878    default:
879        return -EINVAL;
880    }
881out_unlock:
882    sem_unlock(sma);
883    return err;
884}
885
886static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
887        int cmd, int version, union semun arg)
888{
889    struct sem_array *sma;
890    struct sem* curr;
891    int err;
892    ushort fast_sem_io[SEMMSL_FAST];
893    ushort* sem_io = fast_sem_io;
894    int nsems;
895    struct list_head tasks;
896
897    sma = sem_lock_check(ns, semid);
898    if (IS_ERR(sma))
899        return PTR_ERR(sma);
900
901    INIT_LIST_HEAD(&tasks);
902    nsems = sma->sem_nsems;
903
904    err = -EACCES;
905    if (ipcperms(ns, &sma->sem_perm,
906            (cmd == SETVAL || cmd == SETALL) ? S_IWUGO : S_IRUGO))
907        goto out_unlock;
908
909    err = security_sem_semctl(sma, cmd);
910    if (err)
911        goto out_unlock;
912
913    err = -EACCES;
914    switch (cmd) {
915    case GETALL:
916    {
917        ushort __user *array = arg.array;
918        int i;
919
920        if(nsems > SEMMSL_FAST) {
921            sem_getref_and_unlock(sma);
922
923            sem_io = ipc_alloc(sizeof(ushort)*nsems);
924            if(sem_io == NULL) {
925                sem_putref(sma);
926                return -ENOMEM;
927            }
928
929            sem_lock_and_putref(sma);
930            if (sma->sem_perm.deleted) {
931                sem_unlock(sma);
932                err = -EIDRM;
933                goto out_free;
934            }
935        }
936
937        for (i = 0; i < sma->sem_nsems; i++)
938            sem_io[i] = sma->sem_base[i].semval;
939        sem_unlock(sma);
940        err = 0;
941        if(copy_to_user(array, sem_io, nsems*sizeof(ushort)))
942            err = -EFAULT;
943        goto out_free;
944    }
945    case SETALL:
946    {
947        int i;
948        struct sem_undo *un;
949
950        sem_getref_and_unlock(sma);
951
952        if(nsems > SEMMSL_FAST) {
953            sem_io = ipc_alloc(sizeof(ushort)*nsems);
954            if(sem_io == NULL) {
955                sem_putref(sma);
956                return -ENOMEM;
957            }
958        }
959
960        if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) {
961            sem_putref(sma);
962            err = -EFAULT;
963            goto out_free;
964        }
965
966        for (i = 0; i < nsems; i++) {
967            if (sem_io[i] > SEMVMX) {
968                sem_putref(sma);
969                err = -ERANGE;
970                goto out_free;
971            }
972        }
973        sem_lock_and_putref(sma);
974        if (sma->sem_perm.deleted) {
975            sem_unlock(sma);
976            err = -EIDRM;
977            goto out_free;
978        }
979
980        for (i = 0; i < nsems; i++)
981            sma->sem_base[i].semval = sem_io[i];
982
983        assert_spin_locked(&sma->sem_perm.lock);
984        list_for_each_entry(un, &sma->list_id, list_id) {
985            for (i = 0; i < nsems; i++)
986                un->semadj[i] = 0;
987        }
988        sma->sem_ctime = get_seconds();
989        /* maybe some queued-up processes were waiting for this */
990        do_smart_update(sma, NULL, 0, 0, &tasks);
991        err = 0;
992        goto out_unlock;
993    }
994    /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */
995    }
996    err = -EINVAL;
997    if(semnum < 0 || semnum >= nsems)
998        goto out_unlock;
999
1000    curr = &sma->sem_base[semnum];
1001
1002    switch (cmd) {
1003    case GETVAL:
1004        err = curr->semval;
1005        goto out_unlock;
1006    case GETPID:
1007        err = curr->sempid;
1008        goto out_unlock;
1009    case GETNCNT:
1010        err = count_semncnt(sma,semnum);
1011        goto out_unlock;
1012    case GETZCNT:
1013        err = count_semzcnt(sma,semnum);
1014        goto out_unlock;
1015    case SETVAL:
1016    {
1017        int val = arg.val;
1018        struct sem_undo *un;
1019
1020        err = -ERANGE;
1021        if (val > SEMVMX || val < 0)
1022            goto out_unlock;
1023
1024        assert_spin_locked(&sma->sem_perm.lock);
1025        list_for_each_entry(un, &sma->list_id, list_id)
1026            un->semadj[semnum] = 0;
1027
1028        curr->semval = val;
1029        curr->sempid = task_tgid_vnr(current);
1030        sma->sem_ctime = get_seconds();
1031        /* maybe some queued-up processes were waiting for this */
1032        do_smart_update(sma, NULL, 0, 0, &tasks);
1033        err = 0;
1034        goto out_unlock;
1035    }
1036    }
1037out_unlock:
1038    sem_unlock(sma);
1039    wake_up_sem_queue_do(&tasks);
1040
1041out_free:
1042    if(sem_io != fast_sem_io)
1043        ipc_free(sem_io, sizeof(ushort)*nsems);
1044    return err;
1045}
1046
1047static inline unsigned long
1048copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
1049{
1050    switch(version) {
1051    case IPC_64:
1052        if (copy_from_user(out, buf, sizeof(*out)))
1053            return -EFAULT;
1054        return 0;
1055    case IPC_OLD:
1056        {
1057        struct semid_ds tbuf_old;
1058
1059        if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
1060            return -EFAULT;
1061
1062        out->sem_perm.uid = tbuf_old.sem_perm.uid;
1063        out->sem_perm.gid = tbuf_old.sem_perm.gid;
1064        out->sem_perm.mode = tbuf_old.sem_perm.mode;
1065
1066        return 0;
1067        }
1068    default:
1069        return -EINVAL;
1070    }
1071}
1072
1073/*
1074 * This function handles some semctl commands which require the rw_mutex
1075 * to be held in write mode.
1076 * NOTE: no locks must be held, the rw_mutex is taken inside this function.
1077 */
1078static int semctl_down(struct ipc_namespace *ns, int semid,
1079               int cmd, int version, union semun arg)
1080{
1081    struct sem_array *sma;
1082    int err;
1083    struct semid64_ds semid64;
1084    struct kern_ipc_perm *ipcp;
1085
1086    if(cmd == IPC_SET) {
1087        if (copy_semid_from_user(&semid64, arg.buf, version))
1088            return -EFAULT;
1089    }
1090
1091    ipcp = ipcctl_pre_down(ns, &sem_ids(ns), semid, cmd,
1092                   &semid64.sem_perm, 0);
1093    if (IS_ERR(ipcp))
1094        return PTR_ERR(ipcp);
1095
1096    sma = container_of(ipcp, struct sem_array, sem_perm);
1097
1098    err = security_sem_semctl(sma, cmd);
1099    if (err)
1100        goto out_unlock;
1101
1102    switch(cmd){
1103    case IPC_RMID:
1104        freeary(ns, ipcp);
1105        goto out_up;
1106    case IPC_SET:
1107        ipc_update_perm(&semid64.sem_perm, ipcp);
1108        sma->sem_ctime = get_seconds();
1109        break;
1110    default:
1111        err = -EINVAL;
1112    }
1113
1114out_unlock:
1115    sem_unlock(sma);
1116out_up:
1117    up_write(&sem_ids(ns).rw_mutex);
1118    return err;
1119}
1120
1121SYSCALL_DEFINE(semctl)(int semid, int semnum, int cmd, union semun arg)
1122{
1123    int err = -EINVAL;
1124    int version;
1125    struct ipc_namespace *ns;
1126
1127    if (semid < 0)
1128        return -EINVAL;
1129
1130    version = ipc_parse_version(&cmd);
1131    ns = current->nsproxy->ipc_ns;
1132
1133    switch(cmd) {
1134    case IPC_INFO:
1135    case SEM_INFO:
1136    case IPC_STAT:
1137    case SEM_STAT:
1138        err = semctl_nolock(ns, semid, cmd, version, arg);
1139        return err;
1140    case GETALL:
1141    case GETVAL:
1142    case GETPID:
1143    case GETNCNT:
1144    case GETZCNT:
1145    case SETVAL:
1146    case SETALL:
1147        err = semctl_main(ns,semid,semnum,cmd,version,arg);
1148        return err;
1149    case IPC_RMID:
1150    case IPC_SET:
1151        err = semctl_down(ns, semid, cmd, version, arg);
1152        return err;
1153    default:
1154        return -EINVAL;
1155    }
1156}
1157#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
1158asmlinkage long SyS_semctl(int semid, int semnum, int cmd, union semun arg)
1159{
1160    return SYSC_semctl((int) semid, (int) semnum, (int) cmd, arg);
1161}
1162SYSCALL_ALIAS(sys_semctl, SyS_semctl);
1163#endif
1164
1165/* If the task doesn't already have a undo_list, then allocate one
1166 * here. We guarantee there is only one thread using this undo list,
1167 * and current is THE ONE
1168 *
1169 * If this allocation and assignment succeeds, but later
1170 * portions of this code fail, there is no need to free the sem_undo_list.
1171 * Just let it stay associated with the task, and it'll be freed later
1172 * at exit time.
1173 *
1174 * This can block, so callers must hold no locks.
1175 */
1176static inline int get_undo_list(struct sem_undo_list **undo_listp)
1177{
1178    struct sem_undo_list *undo_list;
1179
1180    undo_list = current->sysvsem.undo_list;
1181    if (!undo_list) {
1182        undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
1183        if (undo_list == NULL)
1184            return -ENOMEM;
1185        spin_lock_init(&undo_list->lock);
1186        atomic_set(&undo_list->refcnt, 1);
1187        INIT_LIST_HEAD(&undo_list->list_proc);
1188
1189        current->sysvsem.undo_list = undo_list;
1190    }
1191    *undo_listp = undo_list;
1192    return 0;
1193}
1194
1195static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
1196{
1197    struct sem_undo *un;
1198
1199    list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
1200        if (un->semid == semid)
1201            return un;
1202    }
1203    return NULL;
1204}
1205
1206static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
1207{
1208    struct sem_undo *un;
1209
1210      assert_spin_locked(&ulp->lock);
1211
1212    un = __lookup_undo(ulp, semid);
1213    if (un) {
1214        list_del_rcu(&un->list_proc);
1215        list_add_rcu(&un->list_proc, &ulp->list_proc);
1216    }
1217    return un;
1218}
1219
1220/**
1221 * find_alloc_undo - Lookup (and if not present create) undo array
1222 * @ns: namespace
1223 * @semid: semaphore array id
1224 *
1225 * The function looks up (and if not present creates) the undo structure.
1226 * The size of the undo structure depends on the size of the semaphore
1227 * array, thus the alloc path is not that straightforward.
1228 * Lifetime-rules: sem_undo is rcu-protected, on success, the function
1229 * performs a rcu_read_lock().
1230 */
1231static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1232{
1233    struct sem_array *sma;
1234    struct sem_undo_list *ulp;
1235    struct sem_undo *un, *new;
1236    int nsems;
1237    int error;
1238
1239    error = get_undo_list(&ulp);
1240    if (error)
1241        return ERR_PTR(error);
1242
1243    rcu_read_lock();
1244    spin_lock(&ulp->lock);
1245    un = lookup_undo(ulp, semid);
1246    spin_unlock(&ulp->lock);
1247    if (likely(un!=NULL))
1248        goto out;
1249    rcu_read_unlock();
1250
1251    /* no undo structure around - allocate one. */
1252    /* step 1: figure out the size of the semaphore array */
1253    sma = sem_lock_check(ns, semid);
1254    if (IS_ERR(sma))
1255        return ERR_CAST(sma);
1256
1257    nsems = sma->sem_nsems;
1258    sem_getref_and_unlock(sma);
1259
1260    /* step 2: allocate new undo structure */
1261    new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1262    if (!new) {
1263        sem_putref(sma);
1264        return ERR_PTR(-ENOMEM);
1265    }
1266
1267    /* step 3: Acquire the lock on semaphore array */
1268    sem_lock_and_putref(sma);
1269    if (sma->sem_perm.deleted) {
1270        sem_unlock(sma);
1271        kfree(new);
1272        un = ERR_PTR(-EIDRM);
1273        goto out;
1274    }
1275    spin_lock(&ulp->lock);
1276
1277    /*
1278     * step 4: check for races: did someone else allocate the undo struct?
1279     */
1280    un = lookup_undo(ulp, semid);
1281    if (un) {
1282        kfree(new);
1283        goto success;
1284    }
1285    /* step 5: initialize & link new undo structure */
1286    new->semadj = (short *) &new[1];
1287    new->ulp = ulp;
1288    new->semid = semid;
1289    assert_spin_locked(&ulp->lock);
1290    list_add_rcu(&new->list_proc, &ulp->list_proc);
1291    assert_spin_locked(&sma->sem_perm.lock);
1292    list_add(&new->list_id, &sma->list_id);
1293    un = new;
1294
1295success:
1296    spin_unlock(&ulp->lock);
1297    rcu_read_lock();
1298    sem_unlock(sma);
1299out:
1300    return un;
1301}
1302
1303
1304/**
1305 * get_queue_result - Retrieve the result code from sem_queue
1306 * @q: Pointer to queue structure
1307 *
1308 * Retrieve the return code from the pending queue. If IN_WAKEUP is found in
1309 * q->status, then we must loop until the value is replaced with the final
1310 * value: This may happen if a task is woken up by an unrelated event (e.g.
1311 * signal) and in parallel the task is woken up by another task because it got
1312 * the requested semaphores.
1313 *
1314 * The function can be called with or without holding the semaphore spinlock.
1315 */
1316static int get_queue_result(struct sem_queue *q)
1317{
1318    int error;
1319
1320    error = q->status;
1321    while (unlikely(error == IN_WAKEUP)) {
1322        cpu_relax();
1323        error = q->status;
1324    }
1325
1326    return error;
1327}
1328
1329
1330SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1331        unsigned, nsops, const struct timespec __user *, timeout)
1332{
1333    int error = -EINVAL;
1334    struct sem_array *sma;
1335    struct sembuf fast_sops[SEMOPM_FAST];
1336    struct sembuf* sops = fast_sops, *sop;
1337    struct sem_undo *un;
1338    int undos = 0, alter = 0, max;
1339    struct sem_queue queue;
1340    unsigned long jiffies_left = 0;
1341    struct ipc_namespace *ns;
1342    struct list_head tasks;
1343
1344    ns = current->nsproxy->ipc_ns;
1345
1346    if (nsops < 1 || semid < 0)
1347        return -EINVAL;
1348    if (nsops > ns->sc_semopm)
1349        return -E2BIG;
1350    if(nsops > SEMOPM_FAST) {
1351        sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
1352        if(sops==NULL)
1353            return -ENOMEM;
1354    }
1355    if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) {
1356        error=-EFAULT;
1357        goto out_free;
1358    }
1359    if (timeout) {
1360        struct timespec _timeout;
1361        if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) {
1362            error = -EFAULT;
1363            goto out_free;
1364        }
1365        if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 ||
1366            _timeout.tv_nsec >= 1000000000L) {
1367            error = -EINVAL;
1368            goto out_free;
1369        }
1370        jiffies_left = timespec_to_jiffies(&_timeout);
1371    }
1372    max = 0;
1373    for (sop = sops; sop < sops + nsops; sop++) {
1374        if (sop->sem_num >= max)
1375            max = sop->sem_num;
1376        if (sop->sem_flg & SEM_UNDO)
1377            undos = 1;
1378        if (sop->sem_op != 0)
1379            alter = 1;
1380    }
1381
1382    if (undos) {
1383        un = find_alloc_undo(ns, semid);
1384        if (IS_ERR(un)) {
1385            error = PTR_ERR(un);
1386            goto out_free;
1387        }
1388    } else
1389        un = NULL;
1390
1391    INIT_LIST_HEAD(&tasks);
1392
1393    sma = sem_lock_check(ns, semid);
1394    if (IS_ERR(sma)) {
1395        if (un)
1396            rcu_read_unlock();
1397        error = PTR_ERR(sma);
1398        goto out_free;
1399    }
1400
1401    /*
1402     * semid identifiers are not unique - find_alloc_undo may have
1403     * allocated an undo structure, it was invalidated by an RMID
1404     * and now a new array with received the same id. Check and fail.
1405     * This case can be detected checking un->semid. The existence of
1406     * "un" itself is guaranteed by rcu.
1407     */
1408    error = -EIDRM;
1409    if (un) {
1410        if (un->semid == -1) {
1411            rcu_read_unlock();
1412            goto out_unlock_free;
1413        } else {
1414            /*
1415             * rcu lock can be released, "un" cannot disappear:
1416             * - sem_lock is acquired, thus IPC_RMID is
1417             * impossible.
1418             * - exit_sem is impossible, it always operates on
1419             * current (or a dead task).
1420             */
1421
1422            rcu_read_unlock();
1423        }
1424    }
1425
1426    error = -EFBIG;
1427    if (max >= sma->sem_nsems)
1428        goto out_unlock_free;
1429
1430    error = -EACCES;
1431    if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
1432        goto out_unlock_free;
1433
1434    error = security_sem_semop(sma, sops, nsops, alter);
1435    if (error)
1436        goto out_unlock_free;
1437
1438    error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
1439    if (error <= 0) {
1440        if (alter && error == 0)
1441            do_smart_update(sma, sops, nsops, 1, &tasks);
1442
1443        goto out_unlock_free;
1444    }
1445
1446    /* We need to sleep on this operation, so we put the current
1447     * task into the pending queue and go to sleep.
1448     */
1449        
1450    queue.sops = sops;
1451    queue.nsops = nsops;
1452    queue.undo = un;
1453    queue.pid = task_tgid_vnr(current);
1454    queue.alter = alter;
1455    if (alter)
1456        list_add_tail(&queue.list, &sma->sem_pending);
1457    else
1458        list_add(&queue.list, &sma->sem_pending);
1459
1460    if (nsops == 1) {
1461        struct sem *curr;
1462        curr = &sma->sem_base[sops->sem_num];
1463
1464        if (alter)
1465            list_add_tail(&queue.simple_list, &curr->sem_pending);
1466        else
1467            list_add(&queue.simple_list, &curr->sem_pending);
1468    } else {
1469        INIT_LIST_HEAD(&queue.simple_list);
1470        sma->complex_count++;
1471    }
1472
1473    queue.status = -EINTR;
1474    queue.sleeper = current;
1475
1476sleep_again:
1477    current->state = TASK_INTERRUPTIBLE;
1478    sem_unlock(sma);
1479
1480    if (timeout)
1481        jiffies_left = schedule_timeout(jiffies_left);
1482    else
1483        schedule();
1484
1485    error = get_queue_result(&queue);
1486
1487    if (error != -EINTR) {
1488        /* fast path: update_queue already obtained all requested
1489         * resources.
1490         * Perform a smp_mb(): User space could assume that semop()
1491         * is a memory barrier: Without the mb(), the cpu could
1492         * speculatively read in user space stale data that was
1493         * overwritten by the previous owner of the semaphore.
1494         */
1495        smp_mb();
1496
1497        goto out_free;
1498    }
1499
1500    sma = sem_lock(ns, semid);
1501
1502    /*
1503     * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
1504     */
1505    error = get_queue_result(&queue);
1506
1507    /*
1508     * Array removed? If yes, leave without sem_unlock().
1509     */
1510    if (IS_ERR(sma)) {
1511        goto out_free;
1512    }
1513
1514
1515    /*
1516     * If queue.status != -EINTR we are woken up by another process.
1517     * Leave without unlink_queue(), but with sem_unlock().
1518     */
1519
1520    if (error != -EINTR) {
1521        goto out_unlock_free;
1522    }
1523
1524    /*
1525     * If an interrupt occurred we have to clean up the queue
1526     */
1527    if (timeout && jiffies_left == 0)
1528        error = -EAGAIN;
1529
1530    /*
1531     * If the wakeup was spurious, just retry
1532     */
1533    if (error == -EINTR && !signal_pending(current))
1534        goto sleep_again;
1535
1536    unlink_queue(sma, &queue);
1537
1538out_unlock_free:
1539    sem_unlock(sma);
1540
1541    wake_up_sem_queue_do(&tasks);
1542out_free:
1543    if(sops != fast_sops)
1544        kfree(sops);
1545    return error;
1546}
1547
1548SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops,
1549        unsigned, nsops)
1550{
1551    return sys_semtimedop(semid, tsops, nsops, NULL);
1552}
1553
1554/* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
1555 * parent and child tasks.
1556 */
1557
1558int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
1559{
1560    struct sem_undo_list *undo_list;
1561    int error;
1562
1563    if (clone_flags & CLONE_SYSVSEM) {
1564        error = get_undo_list(&undo_list);
1565        if (error)
1566            return error;
1567        atomic_inc(&undo_list->refcnt);
1568        tsk->sysvsem.undo_list = undo_list;
1569    } else
1570        tsk->sysvsem.undo_list = NULL;
1571
1572    return 0;
1573}
1574
1575/*
1576 * add semadj values to semaphores, free undo structures.
1577 * undo structures are not freed when semaphore arrays are destroyed
1578 * so some of them may be out of date.
1579 * IMPLEMENTATION NOTE: There is some confusion over whether the
1580 * set of adjustments that needs to be done should be done in an atomic
1581 * manner or not. That is, if we are attempting to decrement the semval
1582 * should we queue up and wait until we can do so legally?
1583 * The original implementation attempted to do this (queue and wait).
1584 * The current implementation does not do so. The POSIX standard
1585 * and SVID should be consulted to determine what behavior is mandated.
1586 */
1587void exit_sem(struct task_struct *tsk)
1588{
1589    struct sem_undo_list *ulp;
1590
1591    ulp = tsk->sysvsem.undo_list;
1592    if (!ulp)
1593        return;
1594    tsk->sysvsem.undo_list = NULL;
1595
1596    if (!atomic_dec_and_test(&ulp->refcnt))
1597        return;
1598
1599    for (;;) {
1600        struct sem_array *sma;
1601        struct sem_undo *un;
1602        struct list_head tasks;
1603        int semid;
1604        int i;
1605
1606        rcu_read_lock();
1607        un = list_entry_rcu(ulp->list_proc.next,
1608                    struct sem_undo, list_proc);
1609        if (&un->list_proc == &ulp->list_proc)
1610            semid = -1;
1611         else
1612            semid = un->semid;
1613        rcu_read_unlock();
1614
1615        if (semid == -1)
1616            break;
1617
1618        sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
1619
1620        /* exit_sem raced with IPC_RMID, nothing to do */
1621        if (IS_ERR(sma))
1622            continue;
1623
1624        un = __lookup_undo(ulp, semid);
1625        if (un == NULL) {
1626            /* exit_sem raced with IPC_RMID+semget() that created
1627             * exactly the same semid. Nothing to do.
1628             */
1629            sem_unlock(sma);
1630            continue;
1631        }
1632
1633        /* remove un from the linked lists */
1634        assert_spin_locked(&sma->sem_perm.lock);
1635        list_del(&un->list_id);
1636
1637        spin_lock(&ulp->lock);
1638        list_del_rcu(&un->list_proc);
1639        spin_unlock(&ulp->lock);
1640
1641        /* perform adjustments registered in un */
1642        for (i = 0; i < sma->sem_nsems; i++) {
1643            struct sem * semaphore = &sma->sem_base[i];
1644            if (un->semadj[i]) {
1645                semaphore->semval += un->semadj[i];
1646                /*
1647                 * Range checks of the new semaphore value,
1648                 * not defined by sus:
1649                 * - Some unices ignore the undo entirely
1650                 * (e.g. HP UX 11i 11.22, Tru64 V5.1)
1651                 * - some cap the value (e.g. FreeBSD caps
1652                 * at 0, but doesn't enforce SEMVMX)
1653                 *
1654                 * Linux caps the semaphore value, both at 0
1655                 * and at SEMVMX.
1656                 *
1657                 * Manfred <manfred@colorfullife.com>
1658                 */
1659                if (semaphore->semval < 0)
1660                    semaphore->semval = 0;
1661                if (semaphore->semval > SEMVMX)
1662                    semaphore->semval = SEMVMX;
1663                semaphore->sempid = task_tgid_vnr(current);
1664            }
1665        }
1666        /* maybe some queued-up processes were waiting for this */
1667        INIT_LIST_HEAD(&tasks);
1668        do_smart_update(sma, NULL, 0, 1, &tasks);
1669        sem_unlock(sma);
1670        wake_up_sem_queue_do(&tasks);
1671
1672        kfree_rcu(un, rcu);
1673    }
1674    kfree(ulp);
1675}
1676
1677#ifdef CONFIG_PROC_FS
1678static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
1679{
1680    struct sem_array *sma = it;
1681
1682    return seq_printf(s,
1683              "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n",
1684              sma->sem_perm.key,
1685              sma->sem_perm.id,
1686              sma->sem_perm.mode,
1687              sma->sem_nsems,
1688              sma->sem_perm.uid,
1689              sma->sem_perm.gid,
1690              sma->sem_perm.cuid,
1691              sma->sem_perm.cgid,
1692              sma->sem_otime,
1693              sma->sem_ctime);
1694}
1695#endif
1696

Archive Download this file



interactive