Root/
1 | /* |
2 | * fs/timerfd.c |
3 | * |
4 | * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> |
5 | * |
6 | * |
7 | * Thanks to Thomas Gleixner for code reviews and useful comments. |
8 | * |
9 | */ |
10 | |
11 | #include <linux/alarmtimer.h> |
12 | #include <linux/file.h> |
13 | #include <linux/poll.h> |
14 | #include <linux/init.h> |
15 | #include <linux/fs.h> |
16 | #include <linux/sched.h> |
17 | #include <linux/kernel.h> |
18 | #include <linux/slab.h> |
19 | #include <linux/list.h> |
20 | #include <linux/spinlock.h> |
21 | #include <linux/time.h> |
22 | #include <linux/hrtimer.h> |
23 | #include <linux/anon_inodes.h> |
24 | #include <linux/timerfd.h> |
25 | #include <linux/syscalls.h> |
26 | #include <linux/compat.h> |
27 | #include <linux/rcupdate.h> |
28 | |
29 | struct timerfd_ctx { |
30 | union { |
31 | struct hrtimer tmr; |
32 | struct alarm alarm; |
33 | } t; |
34 | ktime_t tintv; |
35 | ktime_t moffs; |
36 | wait_queue_head_t wqh; |
37 | u64 ticks; |
38 | int clockid; |
39 | short unsigned expired; |
40 | short unsigned settime_flags; /* to show in fdinfo */ |
41 | struct rcu_head rcu; |
42 | struct list_head clist; |
43 | bool might_cancel; |
44 | }; |
45 | |
46 | static LIST_HEAD(cancel_list); |
47 | static DEFINE_SPINLOCK(cancel_lock); |
48 | |
49 | static inline bool isalarm(struct timerfd_ctx *ctx) |
50 | { |
51 | return ctx->clockid == CLOCK_REALTIME_ALARM || |
52 | ctx->clockid == CLOCK_BOOTTIME_ALARM; |
53 | } |
54 | |
55 | /* |
56 | * This gets called when the timer event triggers. We set the "expired" |
57 | * flag, but we do not re-arm the timer (in case it's necessary, |
58 | * tintv.tv64 != 0) until the timer is accessed. |
59 | */ |
60 | static void timerfd_triggered(struct timerfd_ctx *ctx) |
61 | { |
62 | unsigned long flags; |
63 | |
64 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
65 | ctx->expired = 1; |
66 | ctx->ticks++; |
67 | wake_up_locked(&ctx->wqh); |
68 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
69 | } |
70 | |
71 | static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) |
72 | { |
73 | struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, |
74 | t.tmr); |
75 | timerfd_triggered(ctx); |
76 | return HRTIMER_NORESTART; |
77 | } |
78 | |
79 | static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm, |
80 | ktime_t now) |
81 | { |
82 | struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, |
83 | t.alarm); |
84 | timerfd_triggered(ctx); |
85 | return ALARMTIMER_NORESTART; |
86 | } |
87 | |
88 | /* |
89 | * Called when the clock was set to cancel the timers in the cancel |
90 | * list. This will wake up processes waiting on these timers. The |
91 | * wake-up requires ctx->ticks to be non zero, therefore we increment |
92 | * it before calling wake_up_locked(). |
93 | */ |
94 | void timerfd_clock_was_set(void) |
95 | { |
96 | ktime_t moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); |
97 | struct timerfd_ctx *ctx; |
98 | unsigned long flags; |
99 | |
100 | rcu_read_lock(); |
101 | list_for_each_entry_rcu(ctx, &cancel_list, clist) { |
102 | if (!ctx->might_cancel) |
103 | continue; |
104 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
105 | if (ctx->moffs.tv64 != moffs.tv64) { |
106 | ctx->moffs.tv64 = KTIME_MAX; |
107 | ctx->ticks++; |
108 | wake_up_locked(&ctx->wqh); |
109 | } |
110 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
111 | } |
112 | rcu_read_unlock(); |
113 | } |
114 | |
115 | static void timerfd_remove_cancel(struct timerfd_ctx *ctx) |
116 | { |
117 | if (ctx->might_cancel) { |
118 | ctx->might_cancel = false; |
119 | spin_lock(&cancel_lock); |
120 | list_del_rcu(&ctx->clist); |
121 | spin_unlock(&cancel_lock); |
122 | } |
123 | } |
124 | |
125 | static bool timerfd_canceled(struct timerfd_ctx *ctx) |
126 | { |
127 | if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) |
128 | return false; |
129 | ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); |
130 | return true; |
131 | } |
132 | |
133 | static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) |
134 | { |
135 | if ((ctx->clockid == CLOCK_REALTIME || |
136 | ctx->clockid == CLOCK_REALTIME_ALARM) && |
137 | (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { |
138 | if (!ctx->might_cancel) { |
139 | ctx->might_cancel = true; |
140 | spin_lock(&cancel_lock); |
141 | list_add_rcu(&ctx->clist, &cancel_list); |
142 | spin_unlock(&cancel_lock); |
143 | } |
144 | } else if (ctx->might_cancel) { |
145 | timerfd_remove_cancel(ctx); |
146 | } |
147 | } |
148 | |
149 | static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) |
150 | { |
151 | ktime_t remaining; |
152 | |
153 | if (isalarm(ctx)) |
154 | remaining = alarm_expires_remaining(&ctx->t.alarm); |
155 | else |
156 | remaining = hrtimer_expires_remaining(&ctx->t.tmr); |
157 | |
158 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; |
159 | } |
160 | |
161 | static int timerfd_setup(struct timerfd_ctx *ctx, int flags, |
162 | const struct itimerspec *ktmr) |
163 | { |
164 | enum hrtimer_mode htmode; |
165 | ktime_t texp; |
166 | int clockid = ctx->clockid; |
167 | |
168 | htmode = (flags & TFD_TIMER_ABSTIME) ? |
169 | HRTIMER_MODE_ABS: HRTIMER_MODE_REL; |
170 | |
171 | texp = timespec_to_ktime(ktmr->it_value); |
172 | ctx->expired = 0; |
173 | ctx->ticks = 0; |
174 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); |
175 | |
176 | if (isalarm(ctx)) { |
177 | alarm_init(&ctx->t.alarm, |
178 | ctx->clockid == CLOCK_REALTIME_ALARM ? |
179 | ALARM_REALTIME : ALARM_BOOTTIME, |
180 | timerfd_alarmproc); |
181 | } else { |
182 | hrtimer_init(&ctx->t.tmr, clockid, htmode); |
183 | hrtimer_set_expires(&ctx->t.tmr, texp); |
184 | ctx->t.tmr.function = timerfd_tmrproc; |
185 | } |
186 | |
187 | if (texp.tv64 != 0) { |
188 | if (isalarm(ctx)) { |
189 | if (flags & TFD_TIMER_ABSTIME) |
190 | alarm_start(&ctx->t.alarm, texp); |
191 | else |
192 | alarm_start_relative(&ctx->t.alarm, texp); |
193 | } else { |
194 | hrtimer_start(&ctx->t.tmr, texp, htmode); |
195 | } |
196 | |
197 | if (timerfd_canceled(ctx)) |
198 | return -ECANCELED; |
199 | } |
200 | |
201 | ctx->settime_flags = flags & TFD_SETTIME_FLAGS; |
202 | return 0; |
203 | } |
204 | |
205 | static int timerfd_release(struct inode *inode, struct file *file) |
206 | { |
207 | struct timerfd_ctx *ctx = file->private_data; |
208 | |
209 | timerfd_remove_cancel(ctx); |
210 | |
211 | if (isalarm(ctx)) |
212 | alarm_cancel(&ctx->t.alarm); |
213 | else |
214 | hrtimer_cancel(&ctx->t.tmr); |
215 | kfree_rcu(ctx, rcu); |
216 | return 0; |
217 | } |
218 | |
219 | static unsigned int timerfd_poll(struct file *file, poll_table *wait) |
220 | { |
221 | struct timerfd_ctx *ctx = file->private_data; |
222 | unsigned int events = 0; |
223 | unsigned long flags; |
224 | |
225 | poll_wait(file, &ctx->wqh, wait); |
226 | |
227 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
228 | if (ctx->ticks) |
229 | events |= POLLIN; |
230 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
231 | |
232 | return events; |
233 | } |
234 | |
235 | static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, |
236 | loff_t *ppos) |
237 | { |
238 | struct timerfd_ctx *ctx = file->private_data; |
239 | ssize_t res; |
240 | u64 ticks = 0; |
241 | |
242 | if (count < sizeof(ticks)) |
243 | return -EINVAL; |
244 | spin_lock_irq(&ctx->wqh.lock); |
245 | if (file->f_flags & O_NONBLOCK) |
246 | res = -EAGAIN; |
247 | else |
248 | res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); |
249 | |
250 | /* |
251 | * If clock has changed, we do not care about the |
252 | * ticks and we do not rearm the timer. Userspace must |
253 | * reevaluate anyway. |
254 | */ |
255 | if (timerfd_canceled(ctx)) { |
256 | ctx->ticks = 0; |
257 | ctx->expired = 0; |
258 | res = -ECANCELED; |
259 | } |
260 | |
261 | if (ctx->ticks) { |
262 | ticks = ctx->ticks; |
263 | |
264 | if (ctx->expired && ctx->tintv.tv64) { |
265 | /* |
266 | * If tintv.tv64 != 0, this is a periodic timer that |
267 | * needs to be re-armed. We avoid doing it in the timer |
268 | * callback to avoid DoS attacks specifying a very |
269 | * short timer period. |
270 | */ |
271 | if (isalarm(ctx)) { |
272 | ticks += alarm_forward_now( |
273 | &ctx->t.alarm, ctx->tintv) - 1; |
274 | alarm_restart(&ctx->t.alarm); |
275 | } else { |
276 | ticks += hrtimer_forward_now(&ctx->t.tmr, |
277 | ctx->tintv) - 1; |
278 | hrtimer_restart(&ctx->t.tmr); |
279 | } |
280 | } |
281 | ctx->expired = 0; |
282 | ctx->ticks = 0; |
283 | } |
284 | spin_unlock_irq(&ctx->wqh.lock); |
285 | if (ticks) |
286 | res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks); |
287 | return res; |
288 | } |
289 | |
290 | #ifdef CONFIG_PROC_FS |
291 | static int timerfd_show(struct seq_file *m, struct file *file) |
292 | { |
293 | struct timerfd_ctx *ctx = file->private_data; |
294 | struct itimerspec t; |
295 | |
296 | spin_lock_irq(&ctx->wqh.lock); |
297 | t.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
298 | t.it_interval = ktime_to_timespec(ctx->tintv); |
299 | spin_unlock_irq(&ctx->wqh.lock); |
300 | |
301 | return seq_printf(m, |
302 | "clockid: %d\n" |
303 | "ticks: %llu\n" |
304 | "settime flags: 0%o\n" |
305 | "it_value: (%llu, %llu)\n" |
306 | "it_interval: (%llu, %llu)\n", |
307 | ctx->clockid, (unsigned long long)ctx->ticks, |
308 | ctx->settime_flags, |
309 | (unsigned long long)t.it_value.tv_sec, |
310 | (unsigned long long)t.it_value.tv_nsec, |
311 | (unsigned long long)t.it_interval.tv_sec, |
312 | (unsigned long long)t.it_interval.tv_nsec); |
313 | } |
314 | #else |
315 | #define timerfd_show NULL |
316 | #endif |
317 | |
318 | #ifdef CONFIG_CHECKPOINT_RESTORE |
319 | static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
320 | { |
321 | struct timerfd_ctx *ctx = file->private_data; |
322 | int ret = 0; |
323 | |
324 | switch (cmd) { |
325 | case TFD_IOC_SET_TICKS: { |
326 | u64 ticks; |
327 | |
328 | if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks))) |
329 | return -EFAULT; |
330 | if (!ticks) |
331 | return -EINVAL; |
332 | |
333 | spin_lock_irq(&ctx->wqh.lock); |
334 | if (!timerfd_canceled(ctx)) { |
335 | ctx->ticks = ticks; |
336 | wake_up_locked(&ctx->wqh); |
337 | } else |
338 | ret = -ECANCELED; |
339 | spin_unlock_irq(&ctx->wqh.lock); |
340 | break; |
341 | } |
342 | default: |
343 | ret = -ENOTTY; |
344 | break; |
345 | } |
346 | |
347 | return ret; |
348 | } |
349 | #else |
350 | #define timerfd_ioctl NULL |
351 | #endif |
352 | |
353 | static const struct file_operations timerfd_fops = { |
354 | .release = timerfd_release, |
355 | .poll = timerfd_poll, |
356 | .read = timerfd_read, |
357 | .llseek = noop_llseek, |
358 | .show_fdinfo = timerfd_show, |
359 | .unlocked_ioctl = timerfd_ioctl, |
360 | }; |
361 | |
362 | static int timerfd_fget(int fd, struct fd *p) |
363 | { |
364 | struct fd f = fdget(fd); |
365 | if (!f.file) |
366 | return -EBADF; |
367 | if (f.file->f_op != &timerfd_fops) { |
368 | fdput(f); |
369 | return -EINVAL; |
370 | } |
371 | *p = f; |
372 | return 0; |
373 | } |
374 | |
375 | SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) |
376 | { |
377 | int ufd; |
378 | struct timerfd_ctx *ctx; |
379 | |
380 | /* Check the TFD_* constants for consistency. */ |
381 | BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); |
382 | BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); |
383 | |
384 | if ((flags & ~TFD_CREATE_FLAGS) || |
385 | (clockid != CLOCK_MONOTONIC && |
386 | clockid != CLOCK_REALTIME && |
387 | clockid != CLOCK_REALTIME_ALARM && |
388 | clockid != CLOCK_BOOTTIME && |
389 | clockid != CLOCK_BOOTTIME_ALARM)) |
390 | return -EINVAL; |
391 | |
392 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); |
393 | if (!ctx) |
394 | return -ENOMEM; |
395 | |
396 | init_waitqueue_head(&ctx->wqh); |
397 | ctx->clockid = clockid; |
398 | |
399 | if (isalarm(ctx)) |
400 | alarm_init(&ctx->t.alarm, |
401 | ctx->clockid == CLOCK_REALTIME_ALARM ? |
402 | ALARM_REALTIME : ALARM_BOOTTIME, |
403 | timerfd_alarmproc); |
404 | else |
405 | hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); |
406 | |
407 | ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); |
408 | |
409 | ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, |
410 | O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); |
411 | if (ufd < 0) |
412 | kfree(ctx); |
413 | |
414 | return ufd; |
415 | } |
416 | |
417 | static int do_timerfd_settime(int ufd, int flags, |
418 | const struct itimerspec *new, |
419 | struct itimerspec *old) |
420 | { |
421 | struct fd f; |
422 | struct timerfd_ctx *ctx; |
423 | int ret; |
424 | |
425 | if ((flags & ~TFD_SETTIME_FLAGS) || |
426 | !timespec_valid(&new->it_value) || |
427 | !timespec_valid(&new->it_interval)) |
428 | return -EINVAL; |
429 | |
430 | ret = timerfd_fget(ufd, &f); |
431 | if (ret) |
432 | return ret; |
433 | ctx = f.file->private_data; |
434 | |
435 | timerfd_setup_cancel(ctx, flags); |
436 | |
437 | /* |
438 | * We need to stop the existing timer before reprogramming |
439 | * it to the new values. |
440 | */ |
441 | for (;;) { |
442 | spin_lock_irq(&ctx->wqh.lock); |
443 | |
444 | if (isalarm(ctx)) { |
445 | if (alarm_try_to_cancel(&ctx->t.alarm) >= 0) |
446 | break; |
447 | } else { |
448 | if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0) |
449 | break; |
450 | } |
451 | spin_unlock_irq(&ctx->wqh.lock); |
452 | cpu_relax(); |
453 | } |
454 | |
455 | /* |
456 | * If the timer is expired and it's periodic, we need to advance it |
457 | * because the caller may want to know the previous expiration time. |
458 | * We do not update "ticks" and "expired" since the timer will be |
459 | * re-programmed again in the following timerfd_setup() call. |
460 | */ |
461 | if (ctx->expired && ctx->tintv.tv64) { |
462 | if (isalarm(ctx)) |
463 | alarm_forward_now(&ctx->t.alarm, ctx->tintv); |
464 | else |
465 | hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); |
466 | } |
467 | |
468 | old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
469 | old->it_interval = ktime_to_timespec(ctx->tintv); |
470 | |
471 | /* |
472 | * Re-program the timer to the new value ... |
473 | */ |
474 | ret = timerfd_setup(ctx, flags, new); |
475 | |
476 | spin_unlock_irq(&ctx->wqh.lock); |
477 | fdput(f); |
478 | return ret; |
479 | } |
480 | |
481 | static int do_timerfd_gettime(int ufd, struct itimerspec *t) |
482 | { |
483 | struct fd f; |
484 | struct timerfd_ctx *ctx; |
485 | int ret = timerfd_fget(ufd, &f); |
486 | if (ret) |
487 | return ret; |
488 | ctx = f.file->private_data; |
489 | |
490 | spin_lock_irq(&ctx->wqh.lock); |
491 | if (ctx->expired && ctx->tintv.tv64) { |
492 | ctx->expired = 0; |
493 | |
494 | if (isalarm(ctx)) { |
495 | ctx->ticks += |
496 | alarm_forward_now( |
497 | &ctx->t.alarm, ctx->tintv) - 1; |
498 | alarm_restart(&ctx->t.alarm); |
499 | } else { |
500 | ctx->ticks += |
501 | hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) |
502 | - 1; |
503 | hrtimer_restart(&ctx->t.tmr); |
504 | } |
505 | } |
506 | t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
507 | t->it_interval = ktime_to_timespec(ctx->tintv); |
508 | spin_unlock_irq(&ctx->wqh.lock); |
509 | fdput(f); |
510 | return 0; |
511 | } |
512 | |
513 | SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, |
514 | const struct itimerspec __user *, utmr, |
515 | struct itimerspec __user *, otmr) |
516 | { |
517 | struct itimerspec new, old; |
518 | int ret; |
519 | |
520 | if (copy_from_user(&new, utmr, sizeof(new))) |
521 | return -EFAULT; |
522 | ret = do_timerfd_settime(ufd, flags, &new, &old); |
523 | if (ret) |
524 | return ret; |
525 | if (otmr && copy_to_user(otmr, &old, sizeof(old))) |
526 | return -EFAULT; |
527 | |
528 | return ret; |
529 | } |
530 | |
531 | SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) |
532 | { |
533 | struct itimerspec kotmr; |
534 | int ret = do_timerfd_gettime(ufd, &kotmr); |
535 | if (ret) |
536 | return ret; |
537 | return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; |
538 | } |
539 | |
540 | #ifdef CONFIG_COMPAT |
541 | COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, |
542 | const struct compat_itimerspec __user *, utmr, |
543 | struct compat_itimerspec __user *, otmr) |
544 | { |
545 | struct itimerspec new, old; |
546 | int ret; |
547 | |
548 | if (get_compat_itimerspec(&new, utmr)) |
549 | return -EFAULT; |
550 | ret = do_timerfd_settime(ufd, flags, &new, &old); |
551 | if (ret) |
552 | return ret; |
553 | if (otmr && put_compat_itimerspec(otmr, &old)) |
554 | return -EFAULT; |
555 | return ret; |
556 | } |
557 | |
558 | COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd, |
559 | struct compat_itimerspec __user *, otmr) |
560 | { |
561 | struct itimerspec kotmr; |
562 | int ret = do_timerfd_gettime(ufd, &kotmr); |
563 | if (ret) |
564 | return ret; |
565 | return put_compat_itimerspec(otmr, &kotmr) ? -EFAULT: 0; |
566 | } |
567 | #endif |
568 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9