Root/kernel/trace/trace_kprobe.c

1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32#include <linux/stringify.h>
33#include <linux/limits.h>
34#include <asm/bitsperlong.h>
35
36#include "trace.h"
37#include "trace_output.h"
38
39#define MAX_TRACE_ARGS 128
40#define MAX_ARGSTR_LEN 63
41#define MAX_EVENT_NAME_LEN 64
42#define MAX_STRING_SIZE PATH_MAX
43#define KPROBE_EVENT_SYSTEM "kprobes"
44
45/* Reserved field names */
46#define FIELD_STRING_IP "__probe_ip"
47#define FIELD_STRING_RETIP "__probe_ret_ip"
48#define FIELD_STRING_FUNC "__probe_func"
49
50const char *reserved_field_names[] = {
51    "common_type",
52    "common_flags",
53    "common_preempt_count",
54    "common_pid",
55    "common_tgid",
56    FIELD_STRING_IP,
57    FIELD_STRING_RETIP,
58    FIELD_STRING_FUNC,
59};
60
61/* Printing function type */
62typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
63                 void *);
64#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
65#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
66
67/* Printing in basic type function template */
68#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
69static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
70                        const char *name, \
71                        void *data, void *ent)\
72{ \
73    return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
74} \
75static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
76
77DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
78DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
79DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
80DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
82DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
83DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
84DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
85
86/* data_rloc: data relative location, compatible with u32 */
87#define make_data_rloc(len, roffs) \
88    (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
89#define get_rloc_len(dl) ((u32)(dl) >> 16)
90#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
91
92static inline void *get_rloc_data(u32 *dl)
93{
94    return (u8 *)dl + get_rloc_offs(*dl);
95}
96
97/* For data_loc conversion */
98static inline void *get_loc_data(u32 *dl, void *ent)
99{
100    return (u8 *)ent + get_rloc_offs(*dl);
101}
102
103/*
104 * Convert data_rloc to data_loc:
105 * data_rloc stores the offset from data_rloc itself, but data_loc
106 * stores the offset from event entry.
107 */
108#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
109
110/* For defining macros, define string/string_size types */
111typedef u32 string;
112typedef u32 string_size;
113
114/* Print type function for string type */
115static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
116                          const char *name,
117                          void *data, void *ent)
118{
119    int len = *(u32 *)data >> 16;
120
121    if (!len)
122        return trace_seq_printf(s, " %s=(fault)", name);
123    else
124        return trace_seq_printf(s, " %s=\"%s\"", name,
125                    (const char *)get_loc_data(data, ent));
126}
127static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
128
129/* Data fetch function type */
130typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
131
132struct fetch_param {
133    fetch_func_t fn;
134    void *data;
135};
136
137static __kprobes void call_fetch(struct fetch_param *fprm,
138                 struct pt_regs *regs, void *dest)
139{
140    return fprm->fn(regs, fprm->data, dest);
141}
142
143#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
144/*
145 * Define macro for basic types - we don't need to define s* types, because
146 * we have to care only about bitwidth at recording time.
147 */
148#define DEFINE_BASIC_FETCH_FUNCS(method) \
149DEFINE_FETCH_##method(u8) \
150DEFINE_FETCH_##method(u16) \
151DEFINE_FETCH_##method(u32) \
152DEFINE_FETCH_##method(u64)
153
154#define CHECK_FETCH_FUNCS(method, fn) \
155    (((FETCH_FUNC_NAME(method, u8) == fn) || \
156      (FETCH_FUNC_NAME(method, u16) == fn) || \
157      (FETCH_FUNC_NAME(method, u32) == fn) || \
158      (FETCH_FUNC_NAME(method, u64) == fn) || \
159      (FETCH_FUNC_NAME(method, string) == fn) || \
160      (FETCH_FUNC_NAME(method, string_size) == fn)) \
161     && (fn != NULL))
162
163/* Data fetch function templates */
164#define DEFINE_FETCH_reg(type) \
165static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
166                    void *offset, void *dest) \
167{ \
168    *(type *)dest = (type)regs_get_register(regs, \
169                (unsigned int)((unsigned long)offset)); \
170}
171DEFINE_BASIC_FETCH_FUNCS(reg)
172/* No string on the register */
173#define fetch_reg_string NULL
174#define fetch_reg_string_size NULL
175
176#define DEFINE_FETCH_stack(type) \
177static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
178                      void *offset, void *dest) \
179{ \
180    *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
181                (unsigned int)((unsigned long)offset)); \
182}
183DEFINE_BASIC_FETCH_FUNCS(stack)
184/* No string on the stack entry */
185#define fetch_stack_string NULL
186#define fetch_stack_string_size NULL
187
188#define DEFINE_FETCH_retval(type) \
189static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
190                      void *dummy, void *dest) \
191{ \
192    *(type *)dest = (type)regs_return_value(regs); \
193}
194DEFINE_BASIC_FETCH_FUNCS(retval)
195/* No string on the retval */
196#define fetch_retval_string NULL
197#define fetch_retval_string_size NULL
198
199#define DEFINE_FETCH_memory(type) \
200static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
201                      void *addr, void *dest) \
202{ \
203    type retval; \
204    if (probe_kernel_address(addr, retval)) \
205        *(type *)dest = 0; \
206    else \
207        *(type *)dest = retval; \
208}
209DEFINE_BASIC_FETCH_FUNCS(memory)
210/*
211 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
212 * length and relative data location.
213 */
214static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
215                              void *addr, void *dest)
216{
217    long ret;
218    int maxlen = get_rloc_len(*(u32 *)dest);
219    u8 *dst = get_rloc_data(dest);
220    u8 *src = addr;
221    mm_segment_t old_fs = get_fs();
222    if (!maxlen)
223        return;
224    /*
225     * Try to get string again, since the string can be changed while
226     * probing.
227     */
228    set_fs(KERNEL_DS);
229    pagefault_disable();
230    do
231        ret = __copy_from_user_inatomic(dst++, src++, 1);
232    while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
233    dst[-1] = '\0';
234    pagefault_enable();
235    set_fs(old_fs);
236
237    if (ret < 0) { /* Failed to fetch string */
238        ((u8 *)get_rloc_data(dest))[0] = '\0';
239        *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
240    } else
241        *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
242                          get_rloc_offs(*(u32 *)dest));
243}
244/* Return the length of string -- including null terminal byte */
245static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
246                            void *addr, void *dest)
247{
248    int ret, len = 0;
249    u8 c;
250    mm_segment_t old_fs = get_fs();
251
252    set_fs(KERNEL_DS);
253    pagefault_disable();
254    do {
255        ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
256        len++;
257    } while (c && ret == 0 && len < MAX_STRING_SIZE);
258    pagefault_enable();
259    set_fs(old_fs);
260
261    if (ret < 0) /* Failed to check the length */
262        *(u32 *)dest = 0;
263    else
264        *(u32 *)dest = len;
265}
266
267/* Memory fetching by symbol */
268struct symbol_cache {
269    char *symbol;
270    long offset;
271    unsigned long addr;
272};
273
274static unsigned long update_symbol_cache(struct symbol_cache *sc)
275{
276    sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
277    if (sc->addr)
278        sc->addr += sc->offset;
279    return sc->addr;
280}
281
282static void free_symbol_cache(struct symbol_cache *sc)
283{
284    kfree(sc->symbol);
285    kfree(sc);
286}
287
288static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
289{
290    struct symbol_cache *sc;
291
292    if (!sym || strlen(sym) == 0)
293        return NULL;
294    sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
295    if (!sc)
296        return NULL;
297
298    sc->symbol = kstrdup(sym, GFP_KERNEL);
299    if (!sc->symbol) {
300        kfree(sc);
301        return NULL;
302    }
303    sc->offset = offset;
304
305    update_symbol_cache(sc);
306    return sc;
307}
308
309#define DEFINE_FETCH_symbol(type) \
310static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
311                      void *data, void *dest) \
312{ \
313    struct symbol_cache *sc = data; \
314    if (sc->addr) \
315        fetch_memory_##type(regs, (void *)sc->addr, dest); \
316    else \
317        *(type *)dest = 0; \
318}
319DEFINE_BASIC_FETCH_FUNCS(symbol)
320DEFINE_FETCH_symbol(string)
321DEFINE_FETCH_symbol(string_size)
322
323/* Dereference memory access function */
324struct deref_fetch_param {
325    struct fetch_param orig;
326    long offset;
327};
328
329#define DEFINE_FETCH_deref(type) \
330static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
331                        void *data, void *dest) \
332{ \
333    struct deref_fetch_param *dprm = data; \
334    unsigned long addr; \
335    call_fetch(&dprm->orig, regs, &addr); \
336    if (addr) { \
337        addr += dprm->offset; \
338        fetch_memory_##type(regs, (void *)addr, dest); \
339    } else \
340        *(type *)dest = 0; \
341}
342DEFINE_BASIC_FETCH_FUNCS(deref)
343DEFINE_FETCH_deref(string)
344DEFINE_FETCH_deref(string_size)
345
346static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
347{
348    if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
349        free_deref_fetch_param(data->orig.data);
350    else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
351        free_symbol_cache(data->orig.data);
352    kfree(data);
353}
354
355/* Bitfield fetch function */
356struct bitfield_fetch_param {
357    struct fetch_param orig;
358    unsigned char hi_shift;
359    unsigned char low_shift;
360};
361
362#define DEFINE_FETCH_bitfield(type) \
363static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
364                        void *data, void *dest) \
365{ \
366    struct bitfield_fetch_param *bprm = data; \
367    type buf = 0; \
368    call_fetch(&bprm->orig, regs, &buf); \
369    if (buf) { \
370        buf <<= bprm->hi_shift; \
371        buf >>= bprm->low_shift; \
372    } \
373    *(type *)dest = buf; \
374}
375DEFINE_BASIC_FETCH_FUNCS(bitfield)
376#define fetch_bitfield_string NULL
377#define fetch_bitfield_string_size NULL
378
379static __kprobes void
380free_bitfield_fetch_param(struct bitfield_fetch_param *data)
381{
382    /*
383     * Don't check the bitfield itself, because this must be the
384     * last fetch function.
385     */
386    if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
387        free_deref_fetch_param(data->orig.data);
388    else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
389        free_symbol_cache(data->orig.data);
390    kfree(data);
391}
392/* Default (unsigned long) fetch type */
393#define __DEFAULT_FETCH_TYPE(t) u##t
394#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
395#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
396#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
397
398/* Fetch types */
399enum {
400    FETCH_MTD_reg = 0,
401    FETCH_MTD_stack,
402    FETCH_MTD_retval,
403    FETCH_MTD_memory,
404    FETCH_MTD_symbol,
405    FETCH_MTD_deref,
406    FETCH_MTD_bitfield,
407    FETCH_MTD_END,
408};
409
410#define ASSIGN_FETCH_FUNC(method, type) \
411    [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
412
413#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
414    {.name = _name, \
415     .size = _size, \
416     .is_signed = sign, \
417     .print = PRINT_TYPE_FUNC_NAME(ptype), \
418     .fmt = PRINT_TYPE_FMT_NAME(ptype), \
419     .fmttype = _fmttype, \
420     .fetch = { \
421ASSIGN_FETCH_FUNC(reg, ftype), \
422ASSIGN_FETCH_FUNC(stack, ftype), \
423ASSIGN_FETCH_FUNC(retval, ftype), \
424ASSIGN_FETCH_FUNC(memory, ftype), \
425ASSIGN_FETCH_FUNC(symbol, ftype), \
426ASSIGN_FETCH_FUNC(deref, ftype), \
427ASSIGN_FETCH_FUNC(bitfield, ftype), \
428      } \
429    }
430
431#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
432    __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
433
434#define FETCH_TYPE_STRING 0
435#define FETCH_TYPE_STRSIZE 1
436
437/* Fetch type information table */
438static const struct fetch_type {
439    const char *name; /* Name of type */
440    size_t size; /* Byte size of type */
441    int is_signed; /* Signed flag */
442    print_type_func_t print; /* Print functions */
443    const char *fmt; /* Fromat string */
444    const char *fmttype; /* Name in format file */
445    /* Fetch functions */
446    fetch_func_t fetch[FETCH_MTD_END];
447} fetch_type_table[] = {
448    /* Special types */
449    [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
450                    sizeof(u32), 1, "__data_loc char[]"),
451    [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
452                    string_size, sizeof(u32), 0, "u32"),
453    /* Basic types */
454    ASSIGN_FETCH_TYPE(u8, u8, 0),
455    ASSIGN_FETCH_TYPE(u16, u16, 0),
456    ASSIGN_FETCH_TYPE(u32, u32, 0),
457    ASSIGN_FETCH_TYPE(u64, u64, 0),
458    ASSIGN_FETCH_TYPE(s8, u8, 1),
459    ASSIGN_FETCH_TYPE(s16, u16, 1),
460    ASSIGN_FETCH_TYPE(s32, u32, 1),
461    ASSIGN_FETCH_TYPE(s64, u64, 1),
462};
463
464static const struct fetch_type *find_fetch_type(const char *type)
465{
466    int i;
467
468    if (!type)
469        type = DEFAULT_FETCH_TYPE_STR;
470
471    /* Special case: bitfield */
472    if (*type == 'b') {
473        unsigned long bs;
474        type = strchr(type, '/');
475        if (!type)
476            goto fail;
477        type++;
478        if (strict_strtoul(type, 0, &bs))
479            goto fail;
480        switch (bs) {
481        case 8:
482            return find_fetch_type("u8");
483        case 16:
484            return find_fetch_type("u16");
485        case 32:
486            return find_fetch_type("u32");
487        case 64:
488            return find_fetch_type("u64");
489        default:
490            goto fail;
491        }
492    }
493
494    for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
495        if (strcmp(type, fetch_type_table[i].name) == 0)
496            return &fetch_type_table[i];
497fail:
498    return NULL;
499}
500
501/* Special function : only accept unsigned long */
502static __kprobes void fetch_stack_address(struct pt_regs *regs,
503                      void *dummy, void *dest)
504{
505    *(unsigned long *)dest = kernel_stack_pointer(regs);
506}
507
508static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
509                        fetch_func_t orig_fn)
510{
511    int i;
512
513    if (type != &fetch_type_table[FETCH_TYPE_STRING])
514        return NULL; /* Only string type needs size function */
515    for (i = 0; i < FETCH_MTD_END; i++)
516        if (type->fetch[i] == orig_fn)
517            return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
518
519    WARN_ON(1); /* This should not happen */
520    return NULL;
521}
522
523/**
524 * Kprobe event core functions
525 */
526
527struct probe_arg {
528    struct fetch_param fetch;
529    struct fetch_param fetch_size;
530    unsigned int offset; /* Offset from argument entry */
531    const char *name; /* Name of this argument */
532    const char *comm; /* Command of this argument */
533    const struct fetch_type *type; /* Type of this argument */
534};
535
536/* Flags for trace_probe */
537#define TP_FLAG_TRACE 1
538#define TP_FLAG_PROFILE 2
539
540struct trace_probe {
541    struct list_head list;
542    struct kretprobe rp; /* Use rp.kp for kprobe use */
543    unsigned long nhit;
544    unsigned int flags; /* For TP_FLAG_* */
545    const char *symbol; /* symbol name */
546    struct ftrace_event_class class;
547    struct ftrace_event_call call;
548    ssize_t size; /* trace entry size */
549    unsigned int nr_args;
550    struct probe_arg args[];
551};
552
553#define SIZEOF_TRACE_PROBE(n) \
554    (offsetof(struct trace_probe, args) + \
555    (sizeof(struct probe_arg) * (n)))
556
557
558static __kprobes int probe_is_return(struct trace_probe *tp)
559{
560    return tp->rp.handler != NULL;
561}
562
563static __kprobes const char *probe_symbol(struct trace_probe *tp)
564{
565    return tp->symbol ? tp->symbol : "unknown";
566}
567
568static int register_probe_event(struct trace_probe *tp);
569static void unregister_probe_event(struct trace_probe *tp);
570
571static DEFINE_MUTEX(probe_lock);
572static LIST_HEAD(probe_list);
573
574static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
575static int kretprobe_dispatcher(struct kretprobe_instance *ri,
576                struct pt_regs *regs);
577
578/* Check the name is good for event/group/fields */
579static int is_good_name(const char *name)
580{
581    if (!isalpha(*name) && *name != '_')
582        return 0;
583    while (*++name != '\0') {
584        if (!isalpha(*name) && !isdigit(*name) && *name != '_')
585            return 0;
586    }
587    return 1;
588}
589
590/*
591 * Allocate new trace_probe and initialize it (including kprobes).
592 */
593static struct trace_probe *alloc_trace_probe(const char *group,
594                         const char *event,
595                         void *addr,
596                         const char *symbol,
597                         unsigned long offs,
598                         int nargs, int is_return)
599{
600    struct trace_probe *tp;
601    int ret = -ENOMEM;
602
603    tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
604    if (!tp)
605        return ERR_PTR(ret);
606
607    if (symbol) {
608        tp->symbol = kstrdup(symbol, GFP_KERNEL);
609        if (!tp->symbol)
610            goto error;
611        tp->rp.kp.symbol_name = tp->symbol;
612        tp->rp.kp.offset = offs;
613    } else
614        tp->rp.kp.addr = addr;
615
616    if (is_return)
617        tp->rp.handler = kretprobe_dispatcher;
618    else
619        tp->rp.kp.pre_handler = kprobe_dispatcher;
620
621    if (!event || !is_good_name(event)) {
622        ret = -EINVAL;
623        goto error;
624    }
625
626    tp->call.class = &tp->class;
627    tp->call.name = kstrdup(event, GFP_KERNEL);
628    if (!tp->call.name)
629        goto error;
630
631    if (!group || !is_good_name(group)) {
632        ret = -EINVAL;
633        goto error;
634    }
635
636    tp->class.system = kstrdup(group, GFP_KERNEL);
637    if (!tp->class.system)
638        goto error;
639
640    INIT_LIST_HEAD(&tp->list);
641    return tp;
642error:
643    kfree(tp->call.name);
644    kfree(tp->symbol);
645    kfree(tp);
646    return ERR_PTR(ret);
647}
648
649static void free_probe_arg(struct probe_arg *arg)
650{
651    if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
652        free_bitfield_fetch_param(arg->fetch.data);
653    else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
654        free_deref_fetch_param(arg->fetch.data);
655    else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
656        free_symbol_cache(arg->fetch.data);
657    kfree(arg->name);
658    kfree(arg->comm);
659}
660
661static void free_trace_probe(struct trace_probe *tp)
662{
663    int i;
664
665    for (i = 0; i < tp->nr_args; i++)
666        free_probe_arg(&tp->args[i]);
667
668    kfree(tp->call.class->system);
669    kfree(tp->call.name);
670    kfree(tp->symbol);
671    kfree(tp);
672}
673
674static struct trace_probe *find_probe_event(const char *event,
675                        const char *group)
676{
677    struct trace_probe *tp;
678
679    list_for_each_entry(tp, &probe_list, list)
680        if (strcmp(tp->call.name, event) == 0 &&
681            strcmp(tp->call.class->system, group) == 0)
682            return tp;
683    return NULL;
684}
685
686/* Unregister a trace_probe and probe_event: call with locking probe_lock */
687static void unregister_trace_probe(struct trace_probe *tp)
688{
689    if (probe_is_return(tp))
690        unregister_kretprobe(&tp->rp);
691    else
692        unregister_kprobe(&tp->rp.kp);
693    list_del(&tp->list);
694    unregister_probe_event(tp);
695}
696
697/* Register a trace_probe and probe_event */
698static int register_trace_probe(struct trace_probe *tp)
699{
700    struct trace_probe *old_tp;
701    int ret;
702
703    mutex_lock(&probe_lock);
704
705    /* register as an event */
706    old_tp = find_probe_event(tp->call.name, tp->call.class->system);
707    if (old_tp) {
708        /* delete old event */
709        unregister_trace_probe(old_tp);
710        free_trace_probe(old_tp);
711    }
712    ret = register_probe_event(tp);
713    if (ret) {
714        pr_warning("Failed to register probe event(%d)\n", ret);
715        goto end;
716    }
717
718    tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
719    if (probe_is_return(tp))
720        ret = register_kretprobe(&tp->rp);
721    else
722        ret = register_kprobe(&tp->rp.kp);
723
724    if (ret) {
725        pr_warning("Could not insert probe(%d)\n", ret);
726        if (ret == -EILSEQ) {
727            pr_warning("Probing address(0x%p) is not an "
728                   "instruction boundary.\n",
729                   tp->rp.kp.addr);
730            ret = -EINVAL;
731        }
732        unregister_probe_event(tp);
733    } else
734        list_add_tail(&tp->list, &probe_list);
735end:
736    mutex_unlock(&probe_lock);
737    return ret;
738}
739
740/* Split symbol and offset. */
741static int split_symbol_offset(char *symbol, unsigned long *offset)
742{
743    char *tmp;
744    int ret;
745
746    if (!offset)
747        return -EINVAL;
748
749    tmp = strchr(symbol, '+');
750    if (tmp) {
751        /* skip sign because strict_strtol doesn't accept '+' */
752        ret = strict_strtoul(tmp + 1, 0, offset);
753        if (ret)
754            return ret;
755        *tmp = '\0';
756    } else
757        *offset = 0;
758    return 0;
759}
760
761#define PARAM_MAX_ARGS 16
762#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
763
764static int parse_probe_vars(char *arg, const struct fetch_type *t,
765                struct fetch_param *f, int is_return)
766{
767    int ret = 0;
768    unsigned long param;
769
770    if (strcmp(arg, "retval") == 0) {
771        if (is_return)
772            f->fn = t->fetch[FETCH_MTD_retval];
773        else
774            ret = -EINVAL;
775    } else if (strncmp(arg, "stack", 5) == 0) {
776        if (arg[5] == '\0') {
777            if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
778                f->fn = fetch_stack_address;
779            else
780                ret = -EINVAL;
781        } else if (isdigit(arg[5])) {
782            ret = strict_strtoul(arg + 5, 10, &param);
783            if (ret || param > PARAM_MAX_STACK)
784                ret = -EINVAL;
785            else {
786                f->fn = t->fetch[FETCH_MTD_stack];
787                f->data = (void *)param;
788            }
789        } else
790            ret = -EINVAL;
791    } else
792        ret = -EINVAL;
793    return ret;
794}
795
796/* Recursive argument parser */
797static int __parse_probe_arg(char *arg, const struct fetch_type *t,
798                 struct fetch_param *f, int is_return)
799{
800    int ret = 0;
801    unsigned long param;
802    long offset;
803    char *tmp;
804
805    switch (arg[0]) {
806    case '$':
807        ret = parse_probe_vars(arg + 1, t, f, is_return);
808        break;
809    case '%': /* named register */
810        ret = regs_query_register_offset(arg + 1);
811        if (ret >= 0) {
812            f->fn = t->fetch[FETCH_MTD_reg];
813            f->data = (void *)(unsigned long)ret;
814            ret = 0;
815        }
816        break;
817    case '@': /* memory or symbol */
818        if (isdigit(arg[1])) {
819            ret = strict_strtoul(arg + 1, 0, &param);
820            if (ret)
821                break;
822            f->fn = t->fetch[FETCH_MTD_memory];
823            f->data = (void *)param;
824        } else {
825            ret = split_symbol_offset(arg + 1, &offset);
826            if (ret)
827                break;
828            f->data = alloc_symbol_cache(arg + 1, offset);
829            if (f->data)
830                f->fn = t->fetch[FETCH_MTD_symbol];
831        }
832        break;
833    case '+': /* deref memory */
834        arg++; /* Skip '+', because strict_strtol() rejects it. */
835    case '-':
836        tmp = strchr(arg, '(');
837        if (!tmp)
838            break;
839        *tmp = '\0';
840        ret = strict_strtol(arg, 0, &offset);
841        if (ret)
842            break;
843        arg = tmp + 1;
844        tmp = strrchr(arg, ')');
845        if (tmp) {
846            struct deref_fetch_param *dprm;
847            const struct fetch_type *t2 = find_fetch_type(NULL);
848            *tmp = '\0';
849            dprm = kzalloc(sizeof(struct deref_fetch_param),
850                       GFP_KERNEL);
851            if (!dprm)
852                return -ENOMEM;
853            dprm->offset = offset;
854            ret = __parse_probe_arg(arg, t2, &dprm->orig,
855                        is_return);
856            if (ret)
857                kfree(dprm);
858            else {
859                f->fn = t->fetch[FETCH_MTD_deref];
860                f->data = (void *)dprm;
861            }
862        }
863        break;
864    }
865    if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
866        pr_info("%s type has no corresponding fetch method.\n",
867            t->name);
868        ret = -EINVAL;
869    }
870    return ret;
871}
872
873#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
874
875/* Bitfield type needs to be parsed into a fetch function */
876static int __parse_bitfield_probe_arg(const char *bf,
877                      const struct fetch_type *t,
878                      struct fetch_param *f)
879{
880    struct bitfield_fetch_param *bprm;
881    unsigned long bw, bo;
882    char *tail;
883
884    if (*bf != 'b')
885        return 0;
886
887    bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
888    if (!bprm)
889        return -ENOMEM;
890    bprm->orig = *f;
891    f->fn = t->fetch[FETCH_MTD_bitfield];
892    f->data = (void *)bprm;
893
894    bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
895    if (bw == 0 || *tail != '@')
896        return -EINVAL;
897
898    bf = tail + 1;
899    bo = simple_strtoul(bf, &tail, 0);
900    if (tail == bf || *tail != '/')
901        return -EINVAL;
902
903    bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
904    bprm->low_shift = bprm->hi_shift + bo;
905    return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
906}
907
908/* String length checking wrapper */
909static int parse_probe_arg(char *arg, struct trace_probe *tp,
910               struct probe_arg *parg, int is_return)
911{
912    const char *t;
913    int ret;
914
915    if (strlen(arg) > MAX_ARGSTR_LEN) {
916        pr_info("Argument is too long.: %s\n", arg);
917        return -ENOSPC;
918    }
919    parg->comm = kstrdup(arg, GFP_KERNEL);
920    if (!parg->comm) {
921        pr_info("Failed to allocate memory for command '%s'.\n", arg);
922        return -ENOMEM;
923    }
924    t = strchr(parg->comm, ':');
925    if (t) {
926        arg[t - parg->comm] = '\0';
927        t++;
928    }
929    parg->type = find_fetch_type(t);
930    if (!parg->type) {
931        pr_info("Unsupported type: %s\n", t);
932        return -EINVAL;
933    }
934    parg->offset = tp->size;
935    tp->size += parg->type->size;
936    ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
937    if (ret >= 0 && t != NULL)
938        ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
939    if (ret >= 0) {
940        parg->fetch_size.fn = get_fetch_size_function(parg->type,
941                                  parg->fetch.fn);
942        parg->fetch_size.data = parg->fetch.data;
943    }
944    return ret;
945}
946
947/* Return 1 if name is reserved or already used by another argument */
948static int conflict_field_name(const char *name,
949                   struct probe_arg *args, int narg)
950{
951    int i;
952    for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
953        if (strcmp(reserved_field_names[i], name) == 0)
954            return 1;
955    for (i = 0; i < narg; i++)
956        if (strcmp(args[i].name, name) == 0)
957            return 1;
958    return 0;
959}
960
961static int create_trace_probe(int argc, char **argv)
962{
963    /*
964     * Argument syntax:
965     * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
966     * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
967     * Fetch args:
968     * $retval : fetch return value
969     * $stack : fetch stack address
970     * $stackN : fetch Nth of stack (N:0-)
971     * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
972     * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
973     * %REG : fetch register REG
974     * Dereferencing memory fetch:
975     * +|-offs(ARG) : fetch memory at ARG +|- offs address.
976     * Alias name of args:
977     * NAME=FETCHARG : set NAME as alias of FETCHARG.
978     * Type of args:
979     * FETCHARG:TYPE : use TYPE instead of unsigned long.
980     */
981    struct trace_probe *tp;
982    int i, ret = 0;
983    int is_return = 0, is_delete = 0;
984    char *symbol = NULL, *event = NULL, *group = NULL;
985    char *arg;
986    unsigned long offset = 0;
987    void *addr = NULL;
988    char buf[MAX_EVENT_NAME_LEN];
989
990    /* argc must be >= 1 */
991    if (argv[0][0] == 'p')
992        is_return = 0;
993    else if (argv[0][0] == 'r')
994        is_return = 1;
995    else if (argv[0][0] == '-')
996        is_delete = 1;
997    else {
998        pr_info("Probe definition must be started with 'p', 'r' or"
999            " '-'.\n");
1000        return -EINVAL;
1001    }
1002
1003    if (argv[0][1] == ':') {
1004        event = &argv[0][2];
1005        if (strchr(event, '/')) {
1006            group = event;
1007            event = strchr(group, '/') + 1;
1008            event[-1] = '\0';
1009            if (strlen(group) == 0) {
1010                pr_info("Group name is not specified\n");
1011                return -EINVAL;
1012            }
1013        }
1014        if (strlen(event) == 0) {
1015            pr_info("Event name is not specified\n");
1016            return -EINVAL;
1017        }
1018    }
1019    if (!group)
1020        group = KPROBE_EVENT_SYSTEM;
1021
1022    if (is_delete) {
1023        if (!event) {
1024            pr_info("Delete command needs an event name.\n");
1025            return -EINVAL;
1026        }
1027        mutex_lock(&probe_lock);
1028        tp = find_probe_event(event, group);
1029        if (!tp) {
1030            mutex_unlock(&probe_lock);
1031            pr_info("Event %s/%s doesn't exist.\n", group, event);
1032            return -ENOENT;
1033        }
1034        /* delete an event */
1035        unregister_trace_probe(tp);
1036        free_trace_probe(tp);
1037        mutex_unlock(&probe_lock);
1038        return 0;
1039    }
1040
1041    if (argc < 2) {
1042        pr_info("Probe point is not specified.\n");
1043        return -EINVAL;
1044    }
1045    if (isdigit(argv[1][0])) {
1046        if (is_return) {
1047            pr_info("Return probe point must be a symbol.\n");
1048            return -EINVAL;
1049        }
1050        /* an address specified */
1051        ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
1052        if (ret) {
1053            pr_info("Failed to parse address.\n");
1054            return ret;
1055        }
1056    } else {
1057        /* a symbol specified */
1058        symbol = argv[1];
1059        /* TODO: support .init module functions */
1060        ret = split_symbol_offset(symbol, &offset);
1061        if (ret) {
1062            pr_info("Failed to parse symbol.\n");
1063            return ret;
1064        }
1065        if (offset && is_return) {
1066            pr_info("Return probe must be used without offset.\n");
1067            return -EINVAL;
1068        }
1069    }
1070    argc -= 2; argv += 2;
1071
1072    /* setup a probe */
1073    if (!event) {
1074        /* Make a new event name */
1075        if (symbol)
1076            snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
1077                 is_return ? 'r' : 'p', symbol, offset);
1078        else
1079            snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
1080                 is_return ? 'r' : 'p', addr);
1081        event = buf;
1082    }
1083    tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
1084                   is_return);
1085    if (IS_ERR(tp)) {
1086        pr_info("Failed to allocate trace_probe.(%d)\n",
1087            (int)PTR_ERR(tp));
1088        return PTR_ERR(tp);
1089    }
1090
1091    /* parse arguments */
1092    ret = 0;
1093    for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
1094        /* Increment count for freeing args in error case */
1095        tp->nr_args++;
1096
1097        /* Parse argument name */
1098        arg = strchr(argv[i], '=');
1099        if (arg) {
1100            *arg++ = '\0';
1101            tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
1102        } else {
1103            arg = argv[i];
1104            /* If argument name is omitted, set "argN" */
1105            snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
1106            tp->args[i].name = kstrdup(buf, GFP_KERNEL);
1107        }
1108
1109        if (!tp->args[i].name) {
1110            pr_info("Failed to allocate argument[%d] name.\n", i);
1111            ret = -ENOMEM;
1112            goto error;
1113        }
1114
1115        if (!is_good_name(tp->args[i].name)) {
1116            pr_info("Invalid argument[%d] name: %s\n",
1117                i, tp->args[i].name);
1118            ret = -EINVAL;
1119            goto error;
1120        }
1121
1122        if (conflict_field_name(tp->args[i].name, tp->args, i)) {
1123            pr_info("Argument[%d] name '%s' conflicts with "
1124                "another field.\n", i, argv[i]);
1125            ret = -EINVAL;
1126            goto error;
1127        }
1128
1129        /* Parse fetch argument */
1130        ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
1131        if (ret) {
1132            pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
1133            goto error;
1134        }
1135    }
1136
1137    ret = register_trace_probe(tp);
1138    if (ret)
1139        goto error;
1140    return 0;
1141
1142error:
1143    free_trace_probe(tp);
1144    return ret;
1145}
1146
1147static void cleanup_all_probes(void)
1148{
1149    struct trace_probe *tp;
1150
1151    mutex_lock(&probe_lock);
1152    /* TODO: Use batch unregistration */
1153    while (!list_empty(&probe_list)) {
1154        tp = list_entry(probe_list.next, struct trace_probe, list);
1155        unregister_trace_probe(tp);
1156        free_trace_probe(tp);
1157    }
1158    mutex_unlock(&probe_lock);
1159}
1160
1161
1162/* Probes listing interfaces */
1163static void *probes_seq_start(struct seq_file *m, loff_t *pos)
1164{
1165    mutex_lock(&probe_lock);
1166    return seq_list_start(&probe_list, *pos);
1167}
1168
1169static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
1170{
1171    return seq_list_next(v, &probe_list, pos);
1172}
1173
1174static void probes_seq_stop(struct seq_file *m, void *v)
1175{
1176    mutex_unlock(&probe_lock);
1177}
1178
1179static int probes_seq_show(struct seq_file *m, void *v)
1180{
1181    struct trace_probe *tp = v;
1182    int i;
1183
1184    seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
1185    seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
1186
1187    if (!tp->symbol)
1188        seq_printf(m, " 0x%p", tp->rp.kp.addr);
1189    else if (tp->rp.kp.offset)
1190        seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
1191    else
1192        seq_printf(m, " %s", probe_symbol(tp));
1193
1194    for (i = 0; i < tp->nr_args; i++)
1195        seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
1196    seq_printf(m, "\n");
1197
1198    return 0;
1199}
1200
1201static const struct seq_operations probes_seq_op = {
1202    .start = probes_seq_start,
1203    .next = probes_seq_next,
1204    .stop = probes_seq_stop,
1205    .show = probes_seq_show
1206};
1207
1208static int probes_open(struct inode *inode, struct file *file)
1209{
1210    if ((file->f_mode & FMODE_WRITE) &&
1211        (file->f_flags & O_TRUNC))
1212        cleanup_all_probes();
1213
1214    return seq_open(file, &probes_seq_op);
1215}
1216
1217static int command_trace_probe(const char *buf)
1218{
1219    char **argv;
1220    int argc = 0, ret = 0;
1221
1222    argv = argv_split(GFP_KERNEL, buf, &argc);
1223    if (!argv)
1224        return -ENOMEM;
1225
1226    if (argc)
1227        ret = create_trace_probe(argc, argv);
1228
1229    argv_free(argv);
1230    return ret;
1231}
1232
1233#define WRITE_BUFSIZE 4096
1234
1235static ssize_t probes_write(struct file *file, const char __user *buffer,
1236                size_t count, loff_t *ppos)
1237{
1238    char *kbuf, *tmp;
1239    int ret;
1240    size_t done;
1241    size_t size;
1242
1243    kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
1244    if (!kbuf)
1245        return -ENOMEM;
1246
1247    ret = done = 0;
1248    while (done < count) {
1249        size = count - done;
1250        if (size >= WRITE_BUFSIZE)
1251            size = WRITE_BUFSIZE - 1;
1252        if (copy_from_user(kbuf, buffer + done, size)) {
1253            ret = -EFAULT;
1254            goto out;
1255        }
1256        kbuf[size] = '\0';
1257        tmp = strchr(kbuf, '\n');
1258        if (tmp) {
1259            *tmp = '\0';
1260            size = tmp - kbuf + 1;
1261        } else if (done + size < count) {
1262            pr_warning("Line length is too long: "
1263                   "Should be less than %d.", WRITE_BUFSIZE);
1264            ret = -EINVAL;
1265            goto out;
1266        }
1267        done += size;
1268        /* Remove comments */
1269        tmp = strchr(kbuf, '#');
1270        if (tmp)
1271            *tmp = '\0';
1272
1273        ret = command_trace_probe(kbuf);
1274        if (ret)
1275            goto out;
1276    }
1277    ret = done;
1278out:
1279    kfree(kbuf);
1280    return ret;
1281}
1282
1283static const struct file_operations kprobe_events_ops = {
1284    .owner = THIS_MODULE,
1285    .open = probes_open,
1286    .read = seq_read,
1287    .llseek = seq_lseek,
1288    .release = seq_release,
1289    .write = probes_write,
1290};
1291
1292/* Probes profiling interfaces */
1293static int probes_profile_seq_show(struct seq_file *m, void *v)
1294{
1295    struct trace_probe *tp = v;
1296
1297    seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
1298           tp->rp.kp.nmissed);
1299
1300    return 0;
1301}
1302
1303static const struct seq_operations profile_seq_op = {
1304    .start = probes_seq_start,
1305    .next = probes_seq_next,
1306    .stop = probes_seq_stop,
1307    .show = probes_profile_seq_show
1308};
1309
1310static int profile_open(struct inode *inode, struct file *file)
1311{
1312    return seq_open(file, &profile_seq_op);
1313}
1314
1315static const struct file_operations kprobe_profile_ops = {
1316    .owner = THIS_MODULE,
1317    .open = profile_open,
1318    .read = seq_read,
1319    .llseek = seq_lseek,
1320    .release = seq_release,
1321};
1322
1323/* Sum up total data length for dynamic arraies (strings) */
1324static __kprobes int __get_data_size(struct trace_probe *tp,
1325                     struct pt_regs *regs)
1326{
1327    int i, ret = 0;
1328    u32 len;
1329
1330    for (i = 0; i < tp->nr_args; i++)
1331        if (unlikely(tp->args[i].fetch_size.fn)) {
1332            call_fetch(&tp->args[i].fetch_size, regs, &len);
1333            ret += len;
1334        }
1335
1336    return ret;
1337}
1338
1339/* Store the value of each argument */
1340static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
1341                       struct pt_regs *regs,
1342                       u8 *data, int maxlen)
1343{
1344    int i;
1345    u32 end = tp->size;
1346    u32 *dl; /* Data (relative) location */
1347
1348    for (i = 0; i < tp->nr_args; i++) {
1349        if (unlikely(tp->args[i].fetch_size.fn)) {
1350            /*
1351             * First, we set the relative location and
1352             * maximum data length to *dl
1353             */
1354            dl = (u32 *)(data + tp->args[i].offset);
1355            *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
1356            /* Then try to fetch string or dynamic array data */
1357            call_fetch(&tp->args[i].fetch, regs, dl);
1358            /* Reduce maximum length */
1359            end += get_rloc_len(*dl);
1360            maxlen -= get_rloc_len(*dl);
1361            /* Trick here, convert data_rloc to data_loc */
1362            *dl = convert_rloc_to_loc(*dl,
1363                 ent_size + tp->args[i].offset);
1364        } else
1365            /* Just fetching data normally */
1366            call_fetch(&tp->args[i].fetch, regs,
1367                   data + tp->args[i].offset);
1368    }
1369}
1370
1371/* Kprobe handler */
1372static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1373{
1374    struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1375    struct kprobe_trace_entry_head *entry;
1376    struct ring_buffer_event *event;
1377    struct ring_buffer *buffer;
1378    int size, dsize, pc;
1379    unsigned long irq_flags;
1380    struct ftrace_event_call *call = &tp->call;
1381
1382    tp->nhit++;
1383
1384    local_save_flags(irq_flags);
1385    pc = preempt_count();
1386
1387    dsize = __get_data_size(tp, regs);
1388    size = sizeof(*entry) + tp->size + dsize;
1389
1390    event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1391                          size, irq_flags, pc);
1392    if (!event)
1393        return;
1394
1395    entry = ring_buffer_event_data(event);
1396    entry->ip = (unsigned long)kp->addr;
1397    store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1398
1399    if (!filter_current_check_discard(buffer, call, entry, event))
1400        trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1401}
1402
1403/* Kretprobe handler */
1404static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1405                      struct pt_regs *regs)
1406{
1407    struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1408    struct kretprobe_trace_entry_head *entry;
1409    struct ring_buffer_event *event;
1410    struct ring_buffer *buffer;
1411    int size, pc, dsize;
1412    unsigned long irq_flags;
1413    struct ftrace_event_call *call = &tp->call;
1414
1415    local_save_flags(irq_flags);
1416    pc = preempt_count();
1417
1418    dsize = __get_data_size(tp, regs);
1419    size = sizeof(*entry) + tp->size + dsize;
1420
1421    event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1422                          size, irq_flags, pc);
1423    if (!event)
1424        return;
1425
1426    entry = ring_buffer_event_data(event);
1427    entry->func = (unsigned long)tp->rp.kp.addr;
1428    entry->ret_ip = (unsigned long)ri->ret_addr;
1429    store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1430
1431    if (!filter_current_check_discard(buffer, call, entry, event))
1432        trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1433}
1434
1435/* Event entry printers */
1436enum print_line_t
1437print_kprobe_event(struct trace_iterator *iter, int flags,
1438           struct trace_event *event)
1439{
1440    struct kprobe_trace_entry_head *field;
1441    struct trace_seq *s = &iter->seq;
1442    struct trace_probe *tp;
1443    u8 *data;
1444    int i;
1445
1446    field = (struct kprobe_trace_entry_head *)iter->ent;
1447    tp = container_of(event, struct trace_probe, call.event);
1448
1449    if (!trace_seq_printf(s, "%s: (", tp->call.name))
1450        goto partial;
1451
1452    if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1453        goto partial;
1454
1455    if (!trace_seq_puts(s, ")"))
1456        goto partial;
1457
1458    data = (u8 *)&field[1];
1459    for (i = 0; i < tp->nr_args; i++)
1460        if (!tp->args[i].type->print(s, tp->args[i].name,
1461                         data + tp->args[i].offset, field))
1462            goto partial;
1463
1464    if (!trace_seq_puts(s, "\n"))
1465        goto partial;
1466
1467    return TRACE_TYPE_HANDLED;
1468partial:
1469    return TRACE_TYPE_PARTIAL_LINE;
1470}
1471
1472enum print_line_t
1473print_kretprobe_event(struct trace_iterator *iter, int flags,
1474              struct trace_event *event)
1475{
1476    struct kretprobe_trace_entry_head *field;
1477    struct trace_seq *s = &iter->seq;
1478    struct trace_probe *tp;
1479    u8 *data;
1480    int i;
1481
1482    field = (struct kretprobe_trace_entry_head *)iter->ent;
1483    tp = container_of(event, struct trace_probe, call.event);
1484
1485    if (!trace_seq_printf(s, "%s: (", tp->call.name))
1486        goto partial;
1487
1488    if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1489        goto partial;
1490
1491    if (!trace_seq_puts(s, " <- "))
1492        goto partial;
1493
1494    if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1495        goto partial;
1496
1497    if (!trace_seq_puts(s, ")"))
1498        goto partial;
1499
1500    data = (u8 *)&field[1];
1501    for (i = 0; i < tp->nr_args; i++)
1502        if (!tp->args[i].type->print(s, tp->args[i].name,
1503                         data + tp->args[i].offset, field))
1504            goto partial;
1505
1506    if (!trace_seq_puts(s, "\n"))
1507        goto partial;
1508
1509    return TRACE_TYPE_HANDLED;
1510partial:
1511    return TRACE_TYPE_PARTIAL_LINE;
1512}
1513
1514static int probe_event_enable(struct ftrace_event_call *call)
1515{
1516    struct trace_probe *tp = (struct trace_probe *)call->data;
1517
1518    tp->flags |= TP_FLAG_TRACE;
1519    if (probe_is_return(tp))
1520        return enable_kretprobe(&tp->rp);
1521    else
1522        return enable_kprobe(&tp->rp.kp);
1523}
1524
1525static void probe_event_disable(struct ftrace_event_call *call)
1526{
1527    struct trace_probe *tp = (struct trace_probe *)call->data;
1528
1529    tp->flags &= ~TP_FLAG_TRACE;
1530    if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1531        if (probe_is_return(tp))
1532            disable_kretprobe(&tp->rp);
1533        else
1534            disable_kprobe(&tp->rp.kp);
1535    }
1536}
1537
1538#undef DEFINE_FIELD
1539#define DEFINE_FIELD(type, item, name, is_signed) \
1540    do { \
1541        ret = trace_define_field(event_call, #type, name, \
1542                     offsetof(typeof(field), item), \
1543                     sizeof(field.item), is_signed, \
1544                     FILTER_OTHER); \
1545        if (ret) \
1546            return ret; \
1547    } while (0)
1548
1549static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1550{
1551    int ret, i;
1552    struct kprobe_trace_entry_head field;
1553    struct trace_probe *tp = (struct trace_probe *)event_call->data;
1554
1555    DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1556    /* Set argument names as fields */
1557    for (i = 0; i < tp->nr_args; i++) {
1558        ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1559                     tp->args[i].name,
1560                     sizeof(field) + tp->args[i].offset,
1561                     tp->args[i].type->size,
1562                     tp->args[i].type->is_signed,
1563                     FILTER_OTHER);
1564        if (ret)
1565            return ret;
1566    }
1567    return 0;
1568}
1569
1570static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1571{
1572    int ret, i;
1573    struct kretprobe_trace_entry_head field;
1574    struct trace_probe *tp = (struct trace_probe *)event_call->data;
1575
1576    DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1577    DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1578    /* Set argument names as fields */
1579    for (i = 0; i < tp->nr_args; i++) {
1580        ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1581                     tp->args[i].name,
1582                     sizeof(field) + tp->args[i].offset,
1583                     tp->args[i].type->size,
1584                     tp->args[i].type->is_signed,
1585                     FILTER_OTHER);
1586        if (ret)
1587            return ret;
1588    }
1589    return 0;
1590}
1591
1592static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1593{
1594    int i;
1595    int pos = 0;
1596
1597    const char *fmt, *arg;
1598
1599    if (!probe_is_return(tp)) {
1600        fmt = "(%lx)";
1601        arg = "REC->" FIELD_STRING_IP;
1602    } else {
1603        fmt = "(%lx <- %lx)";
1604        arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1605    }
1606
1607    /* When len=0, we just calculate the needed length */
1608#define LEN_OR_ZERO (len ? len - pos : 0)
1609
1610    pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1611
1612    for (i = 0; i < tp->nr_args; i++) {
1613        pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
1614                tp->args[i].name, tp->args[i].type->fmt);
1615    }
1616
1617    pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1618
1619    for (i = 0; i < tp->nr_args; i++) {
1620        if (strcmp(tp->args[i].type->name, "string") == 0)
1621            pos += snprintf(buf + pos, LEN_OR_ZERO,
1622                    ", __get_str(%s)",
1623                    tp->args[i].name);
1624        else
1625            pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1626                    tp->args[i].name);
1627    }
1628
1629#undef LEN_OR_ZERO
1630
1631    /* return the length of print_fmt */
1632    return pos;
1633}
1634
1635static int set_print_fmt(struct trace_probe *tp)
1636{
1637    int len;
1638    char *print_fmt;
1639
1640    /* First: called with 0 length to calculate the needed length */
1641    len = __set_print_fmt(tp, NULL, 0);
1642    print_fmt = kmalloc(len + 1, GFP_KERNEL);
1643    if (!print_fmt)
1644        return -ENOMEM;
1645
1646    /* Second: actually write the @print_fmt */
1647    __set_print_fmt(tp, print_fmt, len + 1);
1648    tp->call.print_fmt = print_fmt;
1649
1650    return 0;
1651}
1652
1653#ifdef CONFIG_PERF_EVENTS
1654
1655/* Kprobe profile handler */
1656static __kprobes void kprobe_perf_func(struct kprobe *kp,
1657                     struct pt_regs *regs)
1658{
1659    struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1660    struct ftrace_event_call *call = &tp->call;
1661    struct kprobe_trace_entry_head *entry;
1662    struct hlist_head *head;
1663    int size, __size, dsize;
1664    int rctx;
1665
1666    dsize = __get_data_size(tp, regs);
1667    __size = sizeof(*entry) + tp->size + dsize;
1668    size = ALIGN(__size + sizeof(u32), sizeof(u64));
1669    size -= sizeof(u32);
1670    if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1671             "profile buffer not large enough"))
1672        return;
1673
1674    entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1675    if (!entry)
1676        return;
1677
1678    entry->ip = (unsigned long)kp->addr;
1679    memset(&entry[1], 0, dsize);
1680    store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1681
1682    head = this_cpu_ptr(call->perf_events);
1683    perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1684}
1685
1686/* Kretprobe profile handler */
1687static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1688                        struct pt_regs *regs)
1689{
1690    struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1691    struct ftrace_event_call *call = &tp->call;
1692    struct kretprobe_trace_entry_head *entry;
1693    struct hlist_head *head;
1694    int size, __size, dsize;
1695    int rctx;
1696
1697    dsize = __get_data_size(tp, regs);
1698    __size = sizeof(*entry) + tp->size + dsize;
1699    size = ALIGN(__size + sizeof(u32), sizeof(u64));
1700    size -= sizeof(u32);
1701    if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1702             "profile buffer not large enough"))
1703        return;
1704
1705    entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1706    if (!entry)
1707        return;
1708
1709    entry->func = (unsigned long)tp->rp.kp.addr;
1710    entry->ret_ip = (unsigned long)ri->ret_addr;
1711    store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1712
1713    head = this_cpu_ptr(call->perf_events);
1714    perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1715}
1716
1717static int probe_perf_enable(struct ftrace_event_call *call)
1718{
1719    struct trace_probe *tp = (struct trace_probe *)call->data;
1720
1721    tp->flags |= TP_FLAG_PROFILE;
1722
1723    if (probe_is_return(tp))
1724        return enable_kretprobe(&tp->rp);
1725    else
1726        return enable_kprobe(&tp->rp.kp);
1727}
1728
1729static void probe_perf_disable(struct ftrace_event_call *call)
1730{
1731    struct trace_probe *tp = (struct trace_probe *)call->data;
1732
1733    tp->flags &= ~TP_FLAG_PROFILE;
1734
1735    if (!(tp->flags & TP_FLAG_TRACE)) {
1736        if (probe_is_return(tp))
1737            disable_kretprobe(&tp->rp);
1738        else
1739            disable_kprobe(&tp->rp.kp);
1740    }
1741}
1742#endif /* CONFIG_PERF_EVENTS */
1743
1744static __kprobes
1745int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1746{
1747    switch (type) {
1748    case TRACE_REG_REGISTER:
1749        return probe_event_enable(event);
1750    case TRACE_REG_UNREGISTER:
1751        probe_event_disable(event);
1752        return 0;
1753
1754#ifdef CONFIG_PERF_EVENTS
1755    case TRACE_REG_PERF_REGISTER:
1756        return probe_perf_enable(event);
1757    case TRACE_REG_PERF_UNREGISTER:
1758        probe_perf_disable(event);
1759        return 0;
1760#endif
1761    }
1762    return 0;
1763}
1764
1765static __kprobes
1766int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1767{
1768    struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1769
1770    if (tp->flags & TP_FLAG_TRACE)
1771        kprobe_trace_func(kp, regs);
1772#ifdef CONFIG_PERF_EVENTS
1773    if (tp->flags & TP_FLAG_PROFILE)
1774        kprobe_perf_func(kp, regs);
1775#endif
1776    return 0; /* We don't tweek kernel, so just return 0 */
1777}
1778
1779static __kprobes
1780int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1781{
1782    struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1783
1784    if (tp->flags & TP_FLAG_TRACE)
1785        kretprobe_trace_func(ri, regs);
1786#ifdef CONFIG_PERF_EVENTS
1787    if (tp->flags & TP_FLAG_PROFILE)
1788        kretprobe_perf_func(ri, regs);
1789#endif
1790    return 0; /* We don't tweek kernel, so just return 0 */
1791}
1792
1793static struct trace_event_functions kretprobe_funcs = {
1794    .trace = print_kretprobe_event
1795};
1796
1797static struct trace_event_functions kprobe_funcs = {
1798    .trace = print_kprobe_event
1799};
1800
1801static int register_probe_event(struct trace_probe *tp)
1802{
1803    struct ftrace_event_call *call = &tp->call;
1804    int ret;
1805
1806    /* Initialize ftrace_event_call */
1807    INIT_LIST_HEAD(&call->class->fields);
1808    if (probe_is_return(tp)) {
1809        call->event.funcs = &kretprobe_funcs;
1810        call->class->define_fields = kretprobe_event_define_fields;
1811    } else {
1812        call->event.funcs = &kprobe_funcs;
1813        call->class->define_fields = kprobe_event_define_fields;
1814    }
1815    if (set_print_fmt(tp) < 0)
1816        return -ENOMEM;
1817    ret = register_ftrace_event(&call->event);
1818    if (!ret) {
1819        kfree(call->print_fmt);
1820        return -ENODEV;
1821    }
1822    call->flags = 0;
1823    call->class->reg = kprobe_register;
1824    call->data = tp;
1825    ret = trace_add_event_call(call);
1826    if (ret) {
1827        pr_info("Failed to register kprobe event: %s\n", call->name);
1828        kfree(call->print_fmt);
1829        unregister_ftrace_event(&call->event);
1830    }
1831    return ret;
1832}
1833
1834static void unregister_probe_event(struct trace_probe *tp)
1835{
1836    /* tp->event is unregistered in trace_remove_event_call() */
1837    trace_remove_event_call(&tp->call);
1838    kfree(tp->call.print_fmt);
1839}
1840
1841/* Make a debugfs interface for controlling probe points */
1842static __init int init_kprobe_trace(void)
1843{
1844    struct dentry *d_tracer;
1845    struct dentry *entry;
1846
1847    d_tracer = tracing_init_dentry();
1848    if (!d_tracer)
1849        return 0;
1850
1851    entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1852                    NULL, &kprobe_events_ops);
1853
1854    /* Event list interface */
1855    if (!entry)
1856        pr_warning("Could not create debugfs "
1857               "'kprobe_events' entry\n");
1858
1859    /* Profile interface */
1860    entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1861                    NULL, &kprobe_profile_ops);
1862
1863    if (!entry)
1864        pr_warning("Could not create debugfs "
1865               "'kprobe_profile' entry\n");
1866    return 0;
1867}
1868fs_initcall(init_kprobe_trace);
1869
1870
1871#ifdef CONFIG_FTRACE_STARTUP_TEST
1872
1873/*
1874 * The "__used" keeps gcc from removing the function symbol
1875 * from the kallsyms table.
1876 */
1877static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
1878                           int a4, int a5, int a6)
1879{
1880    return a1 + a2 + a3 + a4 + a5 + a6;
1881}
1882
1883static __init int kprobe_trace_self_tests_init(void)
1884{
1885    int ret, warn = 0;
1886    int (*target)(int, int, int, int, int, int);
1887    struct trace_probe *tp;
1888
1889    target = kprobe_trace_selftest_target;
1890
1891    pr_info("Testing kprobe tracing: ");
1892
1893    ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1894                  "$stack $stack0 +0($stack)");
1895    if (WARN_ON_ONCE(ret)) {
1896        pr_warning("error on probing function entry.\n");
1897        warn++;
1898    } else {
1899        /* Enable trace point */
1900        tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1901        if (WARN_ON_ONCE(tp == NULL)) {
1902            pr_warning("error on getting new probe.\n");
1903            warn++;
1904        } else
1905            probe_event_enable(&tp->call);
1906    }
1907
1908    ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1909                  "$retval");
1910    if (WARN_ON_ONCE(ret)) {
1911        pr_warning("error on probing function return.\n");
1912        warn++;
1913    } else {
1914        /* Enable trace point */
1915        tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1916        if (WARN_ON_ONCE(tp == NULL)) {
1917            pr_warning("error on getting new probe.\n");
1918            warn++;
1919        } else
1920            probe_event_enable(&tp->call);
1921    }
1922
1923    if (warn)
1924        goto end;
1925
1926    ret = target(1, 2, 3, 4, 5, 6);
1927
1928    ret = command_trace_probe("-:testprobe");
1929    if (WARN_ON_ONCE(ret)) {
1930        pr_warning("error on deleting a probe.\n");
1931        warn++;
1932    }
1933
1934    ret = command_trace_probe("-:testprobe2");
1935    if (WARN_ON_ONCE(ret)) {
1936        pr_warning("error on deleting a probe.\n");
1937        warn++;
1938    }
1939
1940end:
1941    cleanup_all_probes();
1942    if (warn)
1943        pr_cont("NG: Some tests are failed. Please check them.\n");
1944    else
1945        pr_cont("OK\n");
1946    return 0;
1947}
1948
1949late_initcall(kprobe_trace_self_tests_init);
1950
1951#endif
1952

Archive Download this file



interactive