Root/tools/perf/bench/mem-memcpy.c

1/*
2 * mem-memcpy.c
3 *
4 * memcpy: Simple memory copy in various ways
5 *
6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
7 */
8
9#include "../perf.h"
10#include "../util/util.h"
11#include "../util/parse-options.h"
12#include "../util/header.h"
13#include "bench.h"
14#include "mem-memcpy-arch.h"
15
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/time.h>
20#include <errno.h>
21
22#define K 1024
23
24static const char *length_str = "1MB";
25static const char *routine = "default";
26static int iterations = 1;
27static bool use_clock;
28static int clock_fd;
29static bool only_prefault;
30static bool no_prefault;
31
32static const struct option options[] = {
33    OPT_STRING('l', "length", &length_str, "1MB",
34            "Specify length of memory to copy. "
35            "available unit: B, MB, GB (upper and lower)"),
36    OPT_STRING('r', "routine", &routine, "default",
37            "Specify routine to copy"),
38    OPT_INTEGER('i', "iterations", &iterations,
39            "repeat memcpy() invocation this number of times"),
40    OPT_BOOLEAN('c', "clock", &use_clock,
41            "Use CPU clock for measuring"),
42    OPT_BOOLEAN('o', "only-prefault", &only_prefault,
43            "Show only the result with page faults before memcpy()"),
44    OPT_BOOLEAN('n', "no-prefault", &no_prefault,
45            "Show only the result without page faults before memcpy()"),
46    OPT_END()
47};
48
49typedef void *(*memcpy_t)(void *, const void *, size_t);
50
51struct routine {
52    const char *name;
53    const char *desc;
54    memcpy_t fn;
55};
56
57struct routine routines[] = {
58    { "default",
59      "Default memcpy() provided by glibc",
60      memcpy },
61#ifdef ARCH_X86_64
62
63#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
64#include "mem-memcpy-x86-64-asm-def.h"
65#undef MEMCPY_FN
66
67#endif
68
69    { NULL,
70      NULL,
71      NULL }
72};
73
74static const char * const bench_mem_memcpy_usage[] = {
75    "perf bench mem memcpy <options>",
76    NULL
77};
78
79static struct perf_event_attr clock_attr = {
80    .type = PERF_TYPE_HARDWARE,
81    .config = PERF_COUNT_HW_CPU_CYCLES
82};
83
84static void init_clock(void)
85{
86    clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
87
88    if (clock_fd < 0 && errno == ENOSYS)
89        die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
90    else
91        BUG_ON(clock_fd < 0);
92}
93
94static u64 get_clock(void)
95{
96    int ret;
97    u64 clk;
98
99    ret = read(clock_fd, &clk, sizeof(u64));
100    BUG_ON(ret != sizeof(u64));
101
102    return clk;
103}
104
105static double timeval2double(struct timeval *ts)
106{
107    return (double)ts->tv_sec +
108        (double)ts->tv_usec / (double)1000000;
109}
110
111static void alloc_mem(void **dst, void **src, size_t length)
112{
113    *dst = zalloc(length);
114    if (!dst)
115        die("memory allocation failed - maybe length is too large?\n");
116
117    *src = zalloc(length);
118    if (!src)
119        die("memory allocation failed - maybe length is too large?\n");
120}
121
122static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
123{
124    u64 clock_start = 0ULL, clock_end = 0ULL;
125    void *src = NULL, *dst = NULL;
126    int i;
127
128    alloc_mem(&src, &dst, len);
129
130    if (prefault)
131        fn(dst, src, len);
132
133    clock_start = get_clock();
134    for (i = 0; i < iterations; ++i)
135        fn(dst, src, len);
136    clock_end = get_clock();
137
138    free(src);
139    free(dst);
140    return clock_end - clock_start;
141}
142
143static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
144{
145    struct timeval tv_start, tv_end, tv_diff;
146    void *src = NULL, *dst = NULL;
147    int i;
148
149    alloc_mem(&src, &dst, len);
150
151    if (prefault)
152        fn(dst, src, len);
153
154    BUG_ON(gettimeofday(&tv_start, NULL));
155    for (i = 0; i < iterations; ++i)
156        fn(dst, src, len);
157    BUG_ON(gettimeofday(&tv_end, NULL));
158
159    timersub(&tv_end, &tv_start, &tv_diff);
160
161    free(src);
162    free(dst);
163    return (double)((double)len / timeval2double(&tv_diff));
164}
165
166#define pf (no_prefault ? 0 : 1)
167
168#define print_bps(x) do { \
169        if (x < K) \
170            printf(" %14lf B/Sec", x); \
171        else if (x < K * K) \
172            printf(" %14lfd KB/Sec", x / K); \
173        else if (x < K * K * K) \
174            printf(" %14lf MB/Sec", x / K / K); \
175        else \
176            printf(" %14lf GB/Sec", x / K / K / K); \
177    } while (0)
178
179int bench_mem_memcpy(int argc, const char **argv,
180             const char *prefix __used)
181{
182    int i;
183    size_t len;
184    double result_bps[2];
185    u64 result_clock[2];
186
187    argc = parse_options(argc, argv, options,
188                 bench_mem_memcpy_usage, 0);
189
190    if (use_clock)
191        init_clock();
192
193    len = (size_t)perf_atoll((char *)length_str);
194
195    result_clock[0] = result_clock[1] = 0ULL;
196    result_bps[0] = result_bps[1] = 0.0;
197
198    if ((s64)len <= 0) {
199        fprintf(stderr, "Invalid length:%s\n", length_str);
200        return 1;
201    }
202
203    /* same to without specifying either of prefault and no-prefault */
204    if (only_prefault && no_prefault)
205        only_prefault = no_prefault = false;
206
207    for (i = 0; routines[i].name; i++) {
208        if (!strcmp(routines[i].name, routine))
209            break;
210    }
211    if (!routines[i].name) {
212        printf("Unknown routine:%s\n", routine);
213        printf("Available routines...\n");
214        for (i = 0; routines[i].name; i++) {
215            printf("\t%s ... %s\n",
216                   routines[i].name, routines[i].desc);
217        }
218        return 1;
219    }
220
221    if (bench_format == BENCH_FORMAT_DEFAULT)
222        printf("# Copying %s Bytes ...\n\n", length_str);
223
224    if (!only_prefault && !no_prefault) {
225        /* show both of results */
226        if (use_clock) {
227            result_clock[0] =
228                do_memcpy_clock(routines[i].fn, len, false);
229            result_clock[1] =
230                do_memcpy_clock(routines[i].fn, len, true);
231        } else {
232            result_bps[0] =
233                do_memcpy_gettimeofday(routines[i].fn,
234                        len, false);
235            result_bps[1] =
236                do_memcpy_gettimeofday(routines[i].fn,
237                        len, true);
238        }
239    } else {
240        if (use_clock) {
241            result_clock[pf] =
242                do_memcpy_clock(routines[i].fn,
243                        len, only_prefault);
244        } else {
245            result_bps[pf] =
246                do_memcpy_gettimeofday(routines[i].fn,
247                        len, only_prefault);
248        }
249    }
250
251    switch (bench_format) {
252    case BENCH_FORMAT_DEFAULT:
253        if (!only_prefault && !no_prefault) {
254            if (use_clock) {
255                printf(" %14lf Clock/Byte\n",
256                    (double)result_clock[0]
257                    / (double)len);
258                printf(" %14lf Clock/Byte (with prefault)\n",
259                    (double)result_clock[1]
260                    / (double)len);
261            } else {
262                print_bps(result_bps[0]);
263                printf("\n");
264                print_bps(result_bps[1]);
265                printf(" (with prefault)\n");
266            }
267        } else {
268            if (use_clock) {
269                printf(" %14lf Clock/Byte",
270                    (double)result_clock[pf]
271                    / (double)len);
272            } else
273                print_bps(result_bps[pf]);
274
275            printf("%s\n", only_prefault ? " (with prefault)" : "");
276        }
277        break;
278    case BENCH_FORMAT_SIMPLE:
279        if (!only_prefault && !no_prefault) {
280            if (use_clock) {
281                printf("%lf %lf\n",
282                    (double)result_clock[0] / (double)len,
283                    (double)result_clock[1] / (double)len);
284            } else {
285                printf("%lf %lf\n",
286                    result_bps[0], result_bps[1]);
287            }
288        } else {
289            if (use_clock) {
290                printf("%lf\n", (double)result_clock[pf]
291                    / (double)len);
292            } else
293                printf("%lf\n", result_bps[pf]);
294        }
295        break;
296    default:
297        /* reaching this means there's some disaster: */
298        die("unknown format: %d\n", bench_format);
299        break;
300    }
301
302    return 0;
303}
304

Archive Download this file



interactive