Root/
1 | /* |
2 | * taskstats.c - Export per-task statistics to userland |
3 | * |
4 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 |
5 | * (C) Balbir Singh, IBM Corp. 2006 |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or |
10 | * (at your option) any later version. |
11 | * |
12 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | * GNU General Public License for more details. |
16 | * |
17 | */ |
18 | |
19 | #include <linux/kernel.h> |
20 | #include <linux/taskstats_kern.h> |
21 | #include <linux/tsacct_kern.h> |
22 | #include <linux/delayacct.h> |
23 | #include <linux/cpumask.h> |
24 | #include <linux/percpu.h> |
25 | #include <linux/slab.h> |
26 | #include <linux/cgroupstats.h> |
27 | #include <linux/cgroup.h> |
28 | #include <linux/fs.h> |
29 | #include <linux/file.h> |
30 | #include <net/genetlink.h> |
31 | #include <asm/atomic.h> |
32 | |
33 | /* |
34 | * Maximum length of a cpumask that can be specified in |
35 | * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute |
36 | */ |
37 | #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) |
38 | |
39 | static DEFINE_PER_CPU(__u32, taskstats_seqnum); |
40 | static int family_registered; |
41 | struct kmem_cache *taskstats_cache; |
42 | |
43 | static struct genl_family family = { |
44 | .id = GENL_ID_GENERATE, |
45 | .name = TASKSTATS_GENL_NAME, |
46 | .version = TASKSTATS_GENL_VERSION, |
47 | .maxattr = TASKSTATS_CMD_ATTR_MAX, |
48 | }; |
49 | |
50 | static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { |
51 | [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, |
52 | [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, |
53 | [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, |
54 | [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; |
55 | |
56 | static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = { |
57 | [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, |
58 | }; |
59 | |
60 | struct listener { |
61 | struct list_head list; |
62 | pid_t pid; |
63 | char valid; |
64 | }; |
65 | |
66 | struct listener_list { |
67 | struct rw_semaphore sem; |
68 | struct list_head list; |
69 | }; |
70 | static DEFINE_PER_CPU(struct listener_list, listener_array); |
71 | |
72 | enum actions { |
73 | REGISTER, |
74 | DEREGISTER, |
75 | CPU_DONT_CARE |
76 | }; |
77 | |
78 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, |
79 | size_t size) |
80 | { |
81 | struct sk_buff *skb; |
82 | void *reply; |
83 | |
84 | /* |
85 | * If new attributes are added, please revisit this allocation |
86 | */ |
87 | skb = genlmsg_new(size, GFP_KERNEL); |
88 | if (!skb) |
89 | return -ENOMEM; |
90 | |
91 | if (!info) { |
92 | int seq = get_cpu_var(taskstats_seqnum)++; |
93 | put_cpu_var(taskstats_seqnum); |
94 | |
95 | reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); |
96 | } else |
97 | reply = genlmsg_put_reply(skb, info, &family, 0, cmd); |
98 | if (reply == NULL) { |
99 | nlmsg_free(skb); |
100 | return -EINVAL; |
101 | } |
102 | |
103 | *skbp = skb; |
104 | return 0; |
105 | } |
106 | |
107 | /* |
108 | * Send taskstats data in @skb to listener with nl_pid @pid |
109 | */ |
110 | static int send_reply(struct sk_buff *skb, struct genl_info *info) |
111 | { |
112 | struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); |
113 | void *reply = genlmsg_data(genlhdr); |
114 | int rc; |
115 | |
116 | rc = genlmsg_end(skb, reply); |
117 | if (rc < 0) { |
118 | nlmsg_free(skb); |
119 | return rc; |
120 | } |
121 | |
122 | return genlmsg_reply(skb, info); |
123 | } |
124 | |
125 | /* |
126 | * Send taskstats data in @skb to listeners registered for @cpu's exit data |
127 | */ |
128 | static void send_cpu_listeners(struct sk_buff *skb, |
129 | struct listener_list *listeners) |
130 | { |
131 | struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); |
132 | struct listener *s, *tmp; |
133 | struct sk_buff *skb_next, *skb_cur = skb; |
134 | void *reply = genlmsg_data(genlhdr); |
135 | int rc, delcount = 0; |
136 | |
137 | rc = genlmsg_end(skb, reply); |
138 | if (rc < 0) { |
139 | nlmsg_free(skb); |
140 | return; |
141 | } |
142 | |
143 | rc = 0; |
144 | down_read(&listeners->sem); |
145 | list_for_each_entry(s, &listeners->list, list) { |
146 | skb_next = NULL; |
147 | if (!list_is_last(&s->list, &listeners->list)) { |
148 | skb_next = skb_clone(skb_cur, GFP_KERNEL); |
149 | if (!skb_next) |
150 | break; |
151 | } |
152 | rc = genlmsg_unicast(&init_net, skb_cur, s->pid); |
153 | if (rc == -ECONNREFUSED) { |
154 | s->valid = 0; |
155 | delcount++; |
156 | } |
157 | skb_cur = skb_next; |
158 | } |
159 | up_read(&listeners->sem); |
160 | |
161 | if (skb_cur) |
162 | nlmsg_free(skb_cur); |
163 | |
164 | if (!delcount) |
165 | return; |
166 | |
167 | /* Delete invalidated entries */ |
168 | down_write(&listeners->sem); |
169 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { |
170 | if (!s->valid) { |
171 | list_del(&s->list); |
172 | kfree(s); |
173 | } |
174 | } |
175 | up_write(&listeners->sem); |
176 | } |
177 | |
178 | static int fill_pid(pid_t pid, struct task_struct *tsk, |
179 | struct taskstats *stats) |
180 | { |
181 | int rc = 0; |
182 | |
183 | if (!tsk) { |
184 | rcu_read_lock(); |
185 | tsk = find_task_by_vpid(pid); |
186 | if (tsk) |
187 | get_task_struct(tsk); |
188 | rcu_read_unlock(); |
189 | if (!tsk) |
190 | return -ESRCH; |
191 | } else |
192 | get_task_struct(tsk); |
193 | |
194 | memset(stats, 0, sizeof(*stats)); |
195 | /* |
196 | * Each accounting subsystem adds calls to its functions to |
197 | * fill in relevant parts of struct taskstsats as follows |
198 | * |
199 | * per-task-foo(stats, tsk); |
200 | */ |
201 | |
202 | delayacct_add_tsk(stats, tsk); |
203 | |
204 | /* fill in basic acct fields */ |
205 | stats->version = TASKSTATS_VERSION; |
206 | stats->nvcsw = tsk->nvcsw; |
207 | stats->nivcsw = tsk->nivcsw; |
208 | bacct_add_tsk(stats, tsk); |
209 | |
210 | /* fill in extended acct fields */ |
211 | xacct_add_tsk(stats, tsk); |
212 | |
213 | /* Define err: label here if needed */ |
214 | put_task_struct(tsk); |
215 | return rc; |
216 | |
217 | } |
218 | |
219 | static int fill_tgid(pid_t tgid, struct task_struct *first, |
220 | struct taskstats *stats) |
221 | { |
222 | struct task_struct *tsk; |
223 | unsigned long flags; |
224 | int rc = -ESRCH; |
225 | |
226 | /* |
227 | * Add additional stats from live tasks except zombie thread group |
228 | * leaders who are already counted with the dead tasks |
229 | */ |
230 | rcu_read_lock(); |
231 | if (!first) |
232 | first = find_task_by_vpid(tgid); |
233 | |
234 | if (!first || !lock_task_sighand(first, &flags)) |
235 | goto out; |
236 | |
237 | if (first->signal->stats) |
238 | memcpy(stats, first->signal->stats, sizeof(*stats)); |
239 | else |
240 | memset(stats, 0, sizeof(*stats)); |
241 | |
242 | tsk = first; |
243 | do { |
244 | if (tsk->exit_state) |
245 | continue; |
246 | /* |
247 | * Accounting subsystem can call its functions here to |
248 | * fill in relevant parts of struct taskstsats as follows |
249 | * |
250 | * per-task-foo(stats, tsk); |
251 | */ |
252 | delayacct_add_tsk(stats, tsk); |
253 | |
254 | stats->nvcsw += tsk->nvcsw; |
255 | stats->nivcsw += tsk->nivcsw; |
256 | } while_each_thread(first, tsk); |
257 | |
258 | unlock_task_sighand(first, &flags); |
259 | rc = 0; |
260 | out: |
261 | rcu_read_unlock(); |
262 | |
263 | stats->version = TASKSTATS_VERSION; |
264 | /* |
265 | * Accounting subsystems can also add calls here to modify |
266 | * fields of taskstats. |
267 | */ |
268 | return rc; |
269 | } |
270 | |
271 | |
272 | static void fill_tgid_exit(struct task_struct *tsk) |
273 | { |
274 | unsigned long flags; |
275 | |
276 | spin_lock_irqsave(&tsk->sighand->siglock, flags); |
277 | if (!tsk->signal->stats) |
278 | goto ret; |
279 | |
280 | /* |
281 | * Each accounting subsystem calls its functions here to |
282 | * accumalate its per-task stats for tsk, into the per-tgid structure |
283 | * |
284 | * per-task-foo(tsk->signal->stats, tsk); |
285 | */ |
286 | delayacct_add_tsk(tsk->signal->stats, tsk); |
287 | ret: |
288 | spin_unlock_irqrestore(&tsk->sighand->siglock, flags); |
289 | return; |
290 | } |
291 | |
292 | static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) |
293 | { |
294 | struct listener_list *listeners; |
295 | struct listener *s, *tmp; |
296 | unsigned int cpu; |
297 | |
298 | if (!cpumask_subset(mask, cpu_possible_mask)) |
299 | return -EINVAL; |
300 | |
301 | if (isadd == REGISTER) { |
302 | for_each_cpu(cpu, mask) { |
303 | s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, |
304 | cpu_to_node(cpu)); |
305 | if (!s) |
306 | goto cleanup; |
307 | s->pid = pid; |
308 | INIT_LIST_HEAD(&s->list); |
309 | s->valid = 1; |
310 | |
311 | listeners = &per_cpu(listener_array, cpu); |
312 | down_write(&listeners->sem); |
313 | list_add(&s->list, &listeners->list); |
314 | up_write(&listeners->sem); |
315 | } |
316 | return 0; |
317 | } |
318 | |
319 | /* Deregister or cleanup */ |
320 | cleanup: |
321 | for_each_cpu(cpu, mask) { |
322 | listeners = &per_cpu(listener_array, cpu); |
323 | down_write(&listeners->sem); |
324 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { |
325 | if (s->pid == pid) { |
326 | list_del(&s->list); |
327 | kfree(s); |
328 | break; |
329 | } |
330 | } |
331 | up_write(&listeners->sem); |
332 | } |
333 | return 0; |
334 | } |
335 | |
336 | static int parse(struct nlattr *na, struct cpumask *mask) |
337 | { |
338 | char *data; |
339 | int len; |
340 | int ret; |
341 | |
342 | if (na == NULL) |
343 | return 1; |
344 | len = nla_len(na); |
345 | if (len > TASKSTATS_CPUMASK_MAXLEN) |
346 | return -E2BIG; |
347 | if (len < 1) |
348 | return -EINVAL; |
349 | data = kmalloc(len, GFP_KERNEL); |
350 | if (!data) |
351 | return -ENOMEM; |
352 | nla_strlcpy(data, na, len); |
353 | ret = cpulist_parse(data, mask); |
354 | kfree(data); |
355 | return ret; |
356 | } |
357 | |
358 | static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) |
359 | { |
360 | struct nlattr *na, *ret; |
361 | int aggr; |
362 | |
363 | aggr = (type == TASKSTATS_TYPE_PID) |
364 | ? TASKSTATS_TYPE_AGGR_PID |
365 | : TASKSTATS_TYPE_AGGR_TGID; |
366 | |
367 | na = nla_nest_start(skb, aggr); |
368 | if (!na) |
369 | goto err; |
370 | if (nla_put(skb, type, sizeof(pid), &pid) < 0) |
371 | goto err; |
372 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); |
373 | if (!ret) |
374 | goto err; |
375 | nla_nest_end(skb, na); |
376 | |
377 | return nla_data(ret); |
378 | err: |
379 | return NULL; |
380 | } |
381 | |
382 | static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) |
383 | { |
384 | int rc = 0; |
385 | struct sk_buff *rep_skb; |
386 | struct cgroupstats *stats; |
387 | struct nlattr *na; |
388 | size_t size; |
389 | u32 fd; |
390 | struct file *file; |
391 | int fput_needed; |
392 | |
393 | na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; |
394 | if (!na) |
395 | return -EINVAL; |
396 | |
397 | fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); |
398 | file = fget_light(fd, &fput_needed); |
399 | if (!file) |
400 | return 0; |
401 | |
402 | size = nla_total_size(sizeof(struct cgroupstats)); |
403 | |
404 | rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, |
405 | size); |
406 | if (rc < 0) |
407 | goto err; |
408 | |
409 | na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, |
410 | sizeof(struct cgroupstats)); |
411 | stats = nla_data(na); |
412 | memset(stats, 0, sizeof(*stats)); |
413 | |
414 | rc = cgroupstats_build(stats, file->f_dentry); |
415 | if (rc < 0) { |
416 | nlmsg_free(rep_skb); |
417 | goto err; |
418 | } |
419 | |
420 | rc = send_reply(rep_skb, info); |
421 | |
422 | err: |
423 | fput_light(file, fput_needed); |
424 | return rc; |
425 | } |
426 | |
427 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) |
428 | { |
429 | int rc; |
430 | struct sk_buff *rep_skb; |
431 | struct taskstats *stats; |
432 | size_t size; |
433 | cpumask_var_t mask; |
434 | |
435 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) |
436 | return -ENOMEM; |
437 | |
438 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); |
439 | if (rc < 0) |
440 | goto free_return_rc; |
441 | if (rc == 0) { |
442 | rc = add_del_listener(info->snd_pid, mask, REGISTER); |
443 | goto free_return_rc; |
444 | } |
445 | |
446 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); |
447 | if (rc < 0) |
448 | goto free_return_rc; |
449 | if (rc == 0) { |
450 | rc = add_del_listener(info->snd_pid, mask, DEREGISTER); |
451 | free_return_rc: |
452 | free_cpumask_var(mask); |
453 | return rc; |
454 | } |
455 | free_cpumask_var(mask); |
456 | |
457 | /* |
458 | * Size includes space for nested attributes |
459 | */ |
460 | size = nla_total_size(sizeof(u32)) + |
461 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); |
462 | |
463 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); |
464 | if (rc < 0) |
465 | return rc; |
466 | |
467 | rc = -EINVAL; |
468 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { |
469 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); |
470 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); |
471 | if (!stats) |
472 | goto err; |
473 | |
474 | rc = fill_pid(pid, NULL, stats); |
475 | if (rc < 0) |
476 | goto err; |
477 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { |
478 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); |
479 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); |
480 | if (!stats) |
481 | goto err; |
482 | |
483 | rc = fill_tgid(tgid, NULL, stats); |
484 | if (rc < 0) |
485 | goto err; |
486 | } else |
487 | goto err; |
488 | |
489 | return send_reply(rep_skb, info); |
490 | err: |
491 | nlmsg_free(rep_skb); |
492 | return rc; |
493 | } |
494 | |
495 | static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) |
496 | { |
497 | struct signal_struct *sig = tsk->signal; |
498 | struct taskstats *stats; |
499 | |
500 | if (sig->stats || thread_group_empty(tsk)) |
501 | goto ret; |
502 | |
503 | /* No problem if kmem_cache_zalloc() fails */ |
504 | stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); |
505 | |
506 | spin_lock_irq(&tsk->sighand->siglock); |
507 | if (!sig->stats) { |
508 | sig->stats = stats; |
509 | stats = NULL; |
510 | } |
511 | spin_unlock_irq(&tsk->sighand->siglock); |
512 | |
513 | if (stats) |
514 | kmem_cache_free(taskstats_cache, stats); |
515 | ret: |
516 | return sig->stats; |
517 | } |
518 | |
519 | /* Send pid data out on exit */ |
520 | void taskstats_exit(struct task_struct *tsk, int group_dead) |
521 | { |
522 | int rc; |
523 | struct listener_list *listeners; |
524 | struct taskstats *stats; |
525 | struct sk_buff *rep_skb; |
526 | size_t size; |
527 | int is_thread_group; |
528 | |
529 | if (!family_registered) |
530 | return; |
531 | |
532 | /* |
533 | * Size includes space for nested attributes |
534 | */ |
535 | size = nla_total_size(sizeof(u32)) + |
536 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); |
537 | |
538 | is_thread_group = !!taskstats_tgid_alloc(tsk); |
539 | if (is_thread_group) { |
540 | /* PID + STATS + TGID + STATS */ |
541 | size = 2 * size; |
542 | /* fill the tsk->signal->stats structure */ |
543 | fill_tgid_exit(tsk); |
544 | } |
545 | |
546 | listeners = &__raw_get_cpu_var(listener_array); |
547 | if (list_empty(&listeners->list)) |
548 | return; |
549 | |
550 | rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size); |
551 | if (rc < 0) |
552 | return; |
553 | |
554 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid); |
555 | if (!stats) |
556 | goto err; |
557 | |
558 | rc = fill_pid(-1, tsk, stats); |
559 | if (rc < 0) |
560 | goto err; |
561 | |
562 | /* |
563 | * Doesn't matter if tsk is the leader or the last group member leaving |
564 | */ |
565 | if (!is_thread_group || !group_dead) |
566 | goto send; |
567 | |
568 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid); |
569 | if (!stats) |
570 | goto err; |
571 | |
572 | memcpy(stats, tsk->signal->stats, sizeof(*stats)); |
573 | |
574 | send: |
575 | send_cpu_listeners(rep_skb, listeners); |
576 | return; |
577 | err: |
578 | nlmsg_free(rep_skb); |
579 | } |
580 | |
581 | static struct genl_ops taskstats_ops = { |
582 | .cmd = TASKSTATS_CMD_GET, |
583 | .doit = taskstats_user_cmd, |
584 | .policy = taskstats_cmd_get_policy, |
585 | }; |
586 | |
587 | static struct genl_ops cgroupstats_ops = { |
588 | .cmd = CGROUPSTATS_CMD_GET, |
589 | .doit = cgroupstats_user_cmd, |
590 | .policy = cgroupstats_cmd_get_policy, |
591 | }; |
592 | |
593 | /* Needed early in initialization */ |
594 | void __init taskstats_init_early(void) |
595 | { |
596 | unsigned int i; |
597 | |
598 | taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC); |
599 | for_each_possible_cpu(i) { |
600 | INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); |
601 | init_rwsem(&(per_cpu(listener_array, i).sem)); |
602 | } |
603 | } |
604 | |
605 | static int __init taskstats_init(void) |
606 | { |
607 | int rc; |
608 | |
609 | rc = genl_register_family(&family); |
610 | if (rc) |
611 | return rc; |
612 | |
613 | rc = genl_register_ops(&family, &taskstats_ops); |
614 | if (rc < 0) |
615 | goto err; |
616 | |
617 | rc = genl_register_ops(&family, &cgroupstats_ops); |
618 | if (rc < 0) |
619 | goto err_cgroup_ops; |
620 | |
621 | family_registered = 1; |
622 | printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); |
623 | return 0; |
624 | err_cgroup_ops: |
625 | genl_unregister_ops(&family, &taskstats_ops); |
626 | err: |
627 | genl_unregister_family(&family); |
628 | return rc; |
629 | } |
630 | |
631 | /* |
632 | * late initcall ensures initialization of statistics collection |
633 | * mechanisms precedes initialization of the taskstats interface |
634 | */ |
635 | late_initcall(taskstats_init); |
636 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9