Root/kernel/tracepoint.c

1/*
2 * Copyright (C) 2008 Mathieu Desnoyers
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */
18#include <linux/module.h>
19#include <linux/mutex.h>
20#include <linux/types.h>
21#include <linux/jhash.h>
22#include <linux/list.h>
23#include <linux/rcupdate.h>
24#include <linux/tracepoint.h>
25#include <linux/err.h>
26#include <linux/slab.h>
27#include <linux/sched.h>
28
29extern struct tracepoint __start___tracepoints[];
30extern struct tracepoint __stop___tracepoints[];
31
32/* Set to 1 to enable tracepoint debug output */
33static const int tracepoint_debug;
34
35/*
36 * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
37 * builtin and module tracepoints and the hash table.
38 */
39static DEFINE_MUTEX(tracepoints_mutex);
40
41/*
42 * Tracepoint hash table, containing the active tracepoints.
43 * Protected by tracepoints_mutex.
44 */
45#define TRACEPOINT_HASH_BITS 6
46#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
47static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
48
49/*
50 * Note about RCU :
51 * It is used to delay the free of multiple probes array until a quiescent
52 * state is reached.
53 * Tracepoint entries modifications are protected by the tracepoints_mutex.
54 */
55struct tracepoint_entry {
56    struct hlist_node hlist;
57    void **funcs;
58    int refcount; /* Number of times armed. 0 if disarmed. */
59    char name[0];
60};
61
62struct tp_probes {
63    union {
64        struct rcu_head rcu;
65        struct list_head list;
66    } u;
67    void *probes[0];
68};
69
70static inline void *allocate_probes(int count)
71{
72    struct tp_probes *p = kmalloc(count * sizeof(void *)
73            + sizeof(struct tp_probes), GFP_KERNEL);
74    return p == NULL ? NULL : p->probes;
75}
76
77static void rcu_free_old_probes(struct rcu_head *head)
78{
79    kfree(container_of(head, struct tp_probes, u.rcu));
80}
81
82static inline void release_probes(void *old)
83{
84    if (old) {
85        struct tp_probes *tp_probes = container_of(old,
86            struct tp_probes, probes[0]);
87        call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
88    }
89}
90
91static void debug_print_probes(struct tracepoint_entry *entry)
92{
93    int i;
94
95    if (!tracepoint_debug || !entry->funcs)
96        return;
97
98    for (i = 0; entry->funcs[i]; i++)
99        printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
100}
101
102static void *
103tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
104{
105    int nr_probes = 0;
106    void **old, **new;
107
108    WARN_ON(!probe);
109
110    debug_print_probes(entry);
111    old = entry->funcs;
112    if (old) {
113        /* (N -> N+1), (N != 0, 1) probes */
114        for (nr_probes = 0; old[nr_probes]; nr_probes++)
115            if (old[nr_probes] == probe)
116                return ERR_PTR(-EEXIST);
117    }
118    /* + 2 : one for new probe, one for NULL func */
119    new = allocate_probes(nr_probes + 2);
120    if (new == NULL)
121        return ERR_PTR(-ENOMEM);
122    if (old)
123        memcpy(new, old, nr_probes * sizeof(void *));
124    new[nr_probes] = probe;
125    new[nr_probes + 1] = NULL;
126    entry->refcount = nr_probes + 1;
127    entry->funcs = new;
128    debug_print_probes(entry);
129    return old;
130}
131
132static void *
133tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
134{
135    int nr_probes = 0, nr_del = 0, i;
136    void **old, **new;
137
138    old = entry->funcs;
139
140    if (!old)
141        return ERR_PTR(-ENOENT);
142
143    debug_print_probes(entry);
144    /* (N -> M), (N > 1, M >= 0) probes */
145    for (nr_probes = 0; old[nr_probes]; nr_probes++) {
146        if ((!probe || old[nr_probes] == probe))
147            nr_del++;
148    }
149
150    if (nr_probes - nr_del == 0) {
151        /* N -> 0, (N > 1) */
152        entry->funcs = NULL;
153        entry->refcount = 0;
154        debug_print_probes(entry);
155        return old;
156    } else {
157        int j = 0;
158        /* N -> M, (N > 1, M > 0) */
159        /* + 1 for NULL */
160        new = allocate_probes(nr_probes - nr_del + 1);
161        if (new == NULL)
162            return ERR_PTR(-ENOMEM);
163        for (i = 0; old[i]; i++)
164            if ((probe && old[i] != probe))
165                new[j++] = old[i];
166        new[nr_probes - nr_del] = NULL;
167        entry->refcount = nr_probes - nr_del;
168        entry->funcs = new;
169    }
170    debug_print_probes(entry);
171    return old;
172}
173
174/*
175 * Get tracepoint if the tracepoint is present in the tracepoint hash table.
176 * Must be called with tracepoints_mutex held.
177 * Returns NULL if not present.
178 */
179static struct tracepoint_entry *get_tracepoint(const char *name)
180{
181    struct hlist_head *head;
182    struct hlist_node *node;
183    struct tracepoint_entry *e;
184    u32 hash = jhash(name, strlen(name), 0);
185
186    head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
187    hlist_for_each_entry(e, node, head, hlist) {
188        if (!strcmp(name, e->name))
189            return e;
190    }
191    return NULL;
192}
193
194/*
195 * Add the tracepoint to the tracepoint hash table. Must be called with
196 * tracepoints_mutex held.
197 */
198static struct tracepoint_entry *add_tracepoint(const char *name)
199{
200    struct hlist_head *head;
201    struct hlist_node *node;
202    struct tracepoint_entry *e;
203    size_t name_len = strlen(name) + 1;
204    u32 hash = jhash(name, name_len-1, 0);
205
206    head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
207    hlist_for_each_entry(e, node, head, hlist) {
208        if (!strcmp(name, e->name)) {
209            printk(KERN_NOTICE
210                "tracepoint %s busy\n", name);
211            return ERR_PTR(-EEXIST); /* Already there */
212        }
213    }
214    /*
215     * Using kmalloc here to allocate a variable length element. Could
216     * cause some memory fragmentation if overused.
217     */
218    e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
219    if (!e)
220        return ERR_PTR(-ENOMEM);
221    memcpy(&e->name[0], name, name_len);
222    e->funcs = NULL;
223    e->refcount = 0;
224    hlist_add_head(&e->hlist, head);
225    return e;
226}
227
228/*
229 * Remove the tracepoint from the tracepoint hash table. Must be called with
230 * mutex_lock held.
231 */
232static inline void remove_tracepoint(struct tracepoint_entry *e)
233{
234    hlist_del(&e->hlist);
235    kfree(e);
236}
237
238/*
239 * Sets the probe callback corresponding to one tracepoint.
240 */
241static void set_tracepoint(struct tracepoint_entry **entry,
242    struct tracepoint *elem, int active)
243{
244    WARN_ON(strcmp((*entry)->name, elem->name) != 0);
245
246    if (elem->regfunc && !elem->state && active)
247        elem->regfunc();
248    else if (elem->unregfunc && elem->state && !active)
249        elem->unregfunc();
250
251    /*
252     * rcu_assign_pointer has a smp_wmb() which makes sure that the new
253     * probe callbacks array is consistent before setting a pointer to it.
254     * This array is referenced by __DO_TRACE from
255     * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
256     * is used.
257     */
258    rcu_assign_pointer(elem->funcs, (*entry)->funcs);
259    elem->state = active;
260}
261
262/*
263 * Disable a tracepoint and its probe callback.
264 * Note: only waiting an RCU period after setting elem->call to the empty
265 * function insures that the original callback is not used anymore. This insured
266 * by preempt_disable around the call site.
267 */
268static void disable_tracepoint(struct tracepoint *elem)
269{
270    if (elem->unregfunc && elem->state)
271        elem->unregfunc();
272
273    elem->state = 0;
274    rcu_assign_pointer(elem->funcs, NULL);
275}
276
277/**
278 * tracepoint_update_probe_range - Update a probe range
279 * @begin: beginning of the range
280 * @end: end of the range
281 *
282 * Updates the probe callback corresponding to a range of tracepoints.
283 */
284void
285tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end)
286{
287    struct tracepoint *iter;
288    struct tracepoint_entry *mark_entry;
289
290    if (!begin)
291        return;
292
293    mutex_lock(&tracepoints_mutex);
294    for (iter = begin; iter < end; iter++) {
295        mark_entry = get_tracepoint(iter->name);
296        if (mark_entry) {
297            set_tracepoint(&mark_entry, iter,
298                    !!mark_entry->refcount);
299        } else {
300            disable_tracepoint(iter);
301        }
302    }
303    mutex_unlock(&tracepoints_mutex);
304}
305
306/*
307 * Update probes, removing the faulty probes.
308 */
309static void tracepoint_update_probes(void)
310{
311    /* Core kernel tracepoints */
312    tracepoint_update_probe_range(__start___tracepoints,
313        __stop___tracepoints);
314    /* tracepoints in modules. */
315    module_update_tracepoints();
316}
317
318static void *tracepoint_add_probe(const char *name, void *probe)
319{
320    struct tracepoint_entry *entry;
321    void *old;
322
323    entry = get_tracepoint(name);
324    if (!entry) {
325        entry = add_tracepoint(name);
326        if (IS_ERR(entry))
327            return entry;
328    }
329    old = tracepoint_entry_add_probe(entry, probe);
330    if (IS_ERR(old) && !entry->refcount)
331        remove_tracepoint(entry);
332    return old;
333}
334
335/**
336 * tracepoint_probe_register - Connect a probe to a tracepoint
337 * @name: tracepoint name
338 * @probe: probe handler
339 *
340 * Returns 0 if ok, error value on error.
341 * The probe address must at least be aligned on the architecture pointer size.
342 */
343int tracepoint_probe_register(const char *name, void *probe)
344{
345    void *old;
346
347    mutex_lock(&tracepoints_mutex);
348    old = tracepoint_add_probe(name, probe);
349    mutex_unlock(&tracepoints_mutex);
350    if (IS_ERR(old))
351        return PTR_ERR(old);
352
353    tracepoint_update_probes(); /* may update entry */
354    release_probes(old);
355    return 0;
356}
357EXPORT_SYMBOL_GPL(tracepoint_probe_register);
358
359static void *tracepoint_remove_probe(const char *name, void *probe)
360{
361    struct tracepoint_entry *entry;
362    void *old;
363
364    entry = get_tracepoint(name);
365    if (!entry)
366        return ERR_PTR(-ENOENT);
367    old = tracepoint_entry_remove_probe(entry, probe);
368    if (IS_ERR(old))
369        return old;
370    if (!entry->refcount)
371        remove_tracepoint(entry);
372    return old;
373}
374
375/**
376 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
377 * @name: tracepoint name
378 * @probe: probe function pointer
379 *
380 * We do not need to call a synchronize_sched to make sure the probes have
381 * finished running before doing a module unload, because the module unload
382 * itself uses stop_machine(), which insures that every preempt disabled section
383 * have finished.
384 */
385int tracepoint_probe_unregister(const char *name, void *probe)
386{
387    void *old;
388
389    mutex_lock(&tracepoints_mutex);
390    old = tracepoint_remove_probe(name, probe);
391    mutex_unlock(&tracepoints_mutex);
392    if (IS_ERR(old))
393        return PTR_ERR(old);
394
395    tracepoint_update_probes(); /* may update entry */
396    release_probes(old);
397    return 0;
398}
399EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
400
401static LIST_HEAD(old_probes);
402static int need_update;
403
404static void tracepoint_add_old_probes(void *old)
405{
406    need_update = 1;
407    if (old) {
408        struct tp_probes *tp_probes = container_of(old,
409            struct tp_probes, probes[0]);
410        list_add(&tp_probes->u.list, &old_probes);
411    }
412}
413
414/**
415 * tracepoint_probe_register_noupdate - register a probe but not connect
416 * @name: tracepoint name
417 * @probe: probe handler
418 *
419 * caller must call tracepoint_probe_update_all()
420 */
421int tracepoint_probe_register_noupdate(const char *name, void *probe)
422{
423    void *old;
424
425    mutex_lock(&tracepoints_mutex);
426    old = tracepoint_add_probe(name, probe);
427    if (IS_ERR(old)) {
428        mutex_unlock(&tracepoints_mutex);
429        return PTR_ERR(old);
430    }
431    tracepoint_add_old_probes(old);
432    mutex_unlock(&tracepoints_mutex);
433    return 0;
434}
435EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
436
437/**
438 * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect
439 * @name: tracepoint name
440 * @probe: probe function pointer
441 *
442 * caller must call tracepoint_probe_update_all()
443 */
444int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
445{
446    void *old;
447
448    mutex_lock(&tracepoints_mutex);
449    old = tracepoint_remove_probe(name, probe);
450    if (IS_ERR(old)) {
451        mutex_unlock(&tracepoints_mutex);
452        return PTR_ERR(old);
453    }
454    tracepoint_add_old_probes(old);
455    mutex_unlock(&tracepoints_mutex);
456    return 0;
457}
458EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
459
460/**
461 * tracepoint_probe_update_all - update tracepoints
462 */
463void tracepoint_probe_update_all(void)
464{
465    LIST_HEAD(release_probes);
466    struct tp_probes *pos, *next;
467
468    mutex_lock(&tracepoints_mutex);
469    if (!need_update) {
470        mutex_unlock(&tracepoints_mutex);
471        return;
472    }
473    if (!list_empty(&old_probes))
474        list_replace_init(&old_probes, &release_probes);
475    need_update = 0;
476    mutex_unlock(&tracepoints_mutex);
477
478    tracepoint_update_probes();
479    list_for_each_entry_safe(pos, next, &release_probes, u.list) {
480        list_del(&pos->u.list);
481        call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
482    }
483}
484EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
485
486/**
487 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
488 * @tracepoint: current tracepoints (in), next tracepoint (out)
489 * @begin: beginning of the range
490 * @end: end of the range
491 *
492 * Returns whether a next tracepoint has been found (1) or not (0).
493 * Will return the first tracepoint in the range if the input tracepoint is
494 * NULL.
495 */
496int tracepoint_get_iter_range(struct tracepoint **tracepoint,
497    struct tracepoint *begin, struct tracepoint *end)
498{
499    if (!*tracepoint && begin != end) {
500        *tracepoint = begin;
501        return 1;
502    }
503    if (*tracepoint >= begin && *tracepoint < end)
504        return 1;
505    return 0;
506}
507EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
508
509static void tracepoint_get_iter(struct tracepoint_iter *iter)
510{
511    int found = 0;
512
513    /* Core kernel tracepoints */
514    if (!iter->module) {
515        found = tracepoint_get_iter_range(&iter->tracepoint,
516                __start___tracepoints, __stop___tracepoints);
517        if (found)
518            goto end;
519    }
520    /* tracepoints in modules. */
521    found = module_get_iter_tracepoints(iter);
522end:
523    if (!found)
524        tracepoint_iter_reset(iter);
525}
526
527void tracepoint_iter_start(struct tracepoint_iter *iter)
528{
529    tracepoint_get_iter(iter);
530}
531EXPORT_SYMBOL_GPL(tracepoint_iter_start);
532
533void tracepoint_iter_next(struct tracepoint_iter *iter)
534{
535    iter->tracepoint++;
536    /*
537     * iter->tracepoint may be invalid because we blindly incremented it.
538     * Make sure it is valid by marshalling on the tracepoints, getting the
539     * tracepoints from following modules if necessary.
540     */
541    tracepoint_get_iter(iter);
542}
543EXPORT_SYMBOL_GPL(tracepoint_iter_next);
544
545void tracepoint_iter_stop(struct tracepoint_iter *iter)
546{
547}
548EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
549
550void tracepoint_iter_reset(struct tracepoint_iter *iter)
551{
552    iter->module = NULL;
553    iter->tracepoint = NULL;
554}
555EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
556
557#ifdef CONFIG_MODULES
558
559int tracepoint_module_notify(struct notifier_block *self,
560                 unsigned long val, void *data)
561{
562    struct module *mod = data;
563
564    switch (val) {
565    case MODULE_STATE_COMING:
566    case MODULE_STATE_GOING:
567        tracepoint_update_probe_range(mod->tracepoints,
568            mod->tracepoints + mod->num_tracepoints);
569        break;
570    }
571    return 0;
572}
573
574struct notifier_block tracepoint_module_nb = {
575    .notifier_call = tracepoint_module_notify,
576    .priority = 0,
577};
578
579static int init_tracepoints(void)
580{
581    return register_module_notifier(&tracepoint_module_nb);
582}
583__initcall(init_tracepoints);
584
585#endif /* CONFIG_MODULES */
586
587#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
588
589/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
590static int sys_tracepoint_refcount;
591
592void syscall_regfunc(void)
593{
594    unsigned long flags;
595    struct task_struct *g, *t;
596
597    if (!sys_tracepoint_refcount) {
598        read_lock_irqsave(&tasklist_lock, flags);
599        do_each_thread(g, t) {
600            /* Skip kernel threads. */
601            if (t->mm)
602                set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
603        } while_each_thread(g, t);
604        read_unlock_irqrestore(&tasklist_lock, flags);
605    }
606    sys_tracepoint_refcount++;
607}
608
609void syscall_unregfunc(void)
610{
611    unsigned long flags;
612    struct task_struct *g, *t;
613
614    sys_tracepoint_refcount--;
615    if (!sys_tracepoint_refcount) {
616        read_lock_irqsave(&tasklist_lock, flags);
617        do_each_thread(g, t) {
618            clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
619        } while_each_thread(g, t);
620        read_unlock_irqrestore(&tasklist_lock, flags);
621    }
622}
623#endif
624

Archive Download this file



interactive