Root/kernel/trace/trace_event_perf.c

1/*
2 * trace event based perf event profiling/tracing
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */
7
8#include <linux/module.h>
9#include <linux/kprobes.h>
10#include "trace.h"
11
12static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13
14/*
15 * Force it to be aligned to unsigned long to avoid misaligned accesses
16 * suprises
17 */
18typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
19    perf_trace_t;
20
21/* Count the events in use (per event id, not per instance) */
22static int total_ref_count;
23
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25                 struct perf_event *p_event)
26{
27    /* No tracing, just counting, so no obvious leak */
28    if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29        return 0;
30
31    /* Some events are ok to be traced by non-root users... */
32    if (p_event->attach_state == PERF_ATTACH_TASK) {
33        if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
34            return 0;
35    }
36
37    /*
38     * ...otherwise raw tracepoint data can be a severe data leak,
39     * only allow root to have these.
40     */
41    if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
42        return -EPERM;
43
44    return 0;
45}
46
47static int perf_trace_event_init(struct ftrace_event_call *tp_event,
48                 struct perf_event *p_event)
49{
50    struct hlist_head __percpu *list;
51    int ret;
52    int cpu;
53
54    ret = perf_trace_event_perm(tp_event, p_event);
55    if (ret)
56        return ret;
57
58    p_event->tp_event = tp_event;
59    if (tp_event->perf_refcount++ > 0)
60        return 0;
61
62    ret = -ENOMEM;
63
64    list = alloc_percpu(struct hlist_head);
65    if (!list)
66        goto fail;
67
68    for_each_possible_cpu(cpu)
69        INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
70
71    tp_event->perf_events = list;
72
73    if (!total_ref_count) {
74        char __percpu *buf;
75        int i;
76
77        for (i = 0; i < PERF_NR_CONTEXTS; i++) {
78            buf = (char __percpu *)alloc_percpu(perf_trace_t);
79            if (!buf)
80                goto fail;
81
82            perf_trace_buf[i] = buf;
83        }
84    }
85
86    ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
87    if (ret)
88        goto fail;
89
90    total_ref_count++;
91    return 0;
92
93fail:
94    if (!total_ref_count) {
95        int i;
96
97        for (i = 0; i < PERF_NR_CONTEXTS; i++) {
98            free_percpu(perf_trace_buf[i]);
99            perf_trace_buf[i] = NULL;
100        }
101    }
102
103    if (!--tp_event->perf_refcount) {
104        free_percpu(tp_event->perf_events);
105        tp_event->perf_events = NULL;
106    }
107
108    return ret;
109}
110
111int perf_trace_init(struct perf_event *p_event)
112{
113    struct ftrace_event_call *tp_event;
114    int event_id = p_event->attr.config;
115    int ret = -EINVAL;
116
117    mutex_lock(&event_mutex);
118    list_for_each_entry(tp_event, &ftrace_events, list) {
119        if (tp_event->event.type == event_id &&
120            tp_event->class && tp_event->class->reg &&
121            try_module_get(tp_event->mod)) {
122            ret = perf_trace_event_init(tp_event, p_event);
123            if (ret)
124                module_put(tp_event->mod);
125            break;
126        }
127    }
128    mutex_unlock(&event_mutex);
129
130    return ret;
131}
132
133int perf_trace_add(struct perf_event *p_event, int flags)
134{
135    struct ftrace_event_call *tp_event = p_event->tp_event;
136    struct hlist_head __percpu *pcpu_list;
137    struct hlist_head *list;
138
139    pcpu_list = tp_event->perf_events;
140    if (WARN_ON_ONCE(!pcpu_list))
141        return -EINVAL;
142
143    if (!(flags & PERF_EF_START))
144        p_event->hw.state = PERF_HES_STOPPED;
145
146    list = this_cpu_ptr(pcpu_list);
147    hlist_add_head_rcu(&p_event->hlist_entry, list);
148
149    return 0;
150}
151
152void perf_trace_del(struct perf_event *p_event, int flags)
153{
154    hlist_del_rcu(&p_event->hlist_entry);
155}
156
157void perf_trace_destroy(struct perf_event *p_event)
158{
159    struct ftrace_event_call *tp_event = p_event->tp_event;
160    int i;
161
162    mutex_lock(&event_mutex);
163    if (--tp_event->perf_refcount > 0)
164        goto out;
165
166    tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
167
168    /*
169     * Ensure our callback won't be called anymore. The buffers
170     * will be freed after that.
171     */
172    tracepoint_synchronize_unregister();
173
174    free_percpu(tp_event->perf_events);
175    tp_event->perf_events = NULL;
176
177    if (!--total_ref_count) {
178        for (i = 0; i < PERF_NR_CONTEXTS; i++) {
179            free_percpu(perf_trace_buf[i]);
180            perf_trace_buf[i] = NULL;
181        }
182    }
183out:
184    module_put(tp_event->mod);
185    mutex_unlock(&event_mutex);
186}
187
188__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
189                       struct pt_regs *regs, int *rctxp)
190{
191    struct trace_entry *entry;
192    unsigned long flags;
193    char *raw_data;
194    int pc;
195
196    BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
197
198    pc = preempt_count();
199
200    *rctxp = perf_swevent_get_recursion_context();
201    if (*rctxp < 0)
202        return NULL;
203
204    raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
205
206    /* zero the dead bytes from align to not leak stack to user */
207    memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
208
209    entry = (struct trace_entry *)raw_data;
210    local_save_flags(flags);
211    tracing_generic_entry_update(entry, flags, pc);
212    entry->type = type;
213
214    return raw_data;
215}
216EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
217

Archive Download this file



interactive