Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/tools/perf/bench/mem-memcpy.c
10821 views
1
/*
2
* mem-memcpy.c
3
*
4
* memcpy: Simple memory copy in various ways
5
*
6
* Written by Hitoshi Mitake <[email protected]>
7
*/
8
#include <ctype.h>
9
10
#include "../perf.h"
11
#include "../util/util.h"
12
#include "../util/parse-options.h"
13
#include "../util/header.h"
14
#include "bench.h"
15
#include "mem-memcpy-arch.h"
16
17
#include <stdio.h>
18
#include <stdlib.h>
19
#include <string.h>
20
#include <sys/time.h>
21
#include <errno.h>
22
23
#define K 1024
24
25
static const char *length_str = "1MB";
26
static const char *routine = "default";
27
static bool use_clock;
28
static int clock_fd;
29
static bool only_prefault;
30
static bool no_prefault;
31
32
static const struct option options[] = {
33
OPT_STRING('l', "length", &length_str, "1MB",
34
"Specify length of memory to copy. "
35
"available unit: B, MB, GB (upper and lower)"),
36
OPT_STRING('r', "routine", &routine, "default",
37
"Specify routine to copy"),
38
OPT_BOOLEAN('c', "clock", &use_clock,
39
"Use CPU clock for measuring"),
40
OPT_BOOLEAN('o', "only-prefault", &only_prefault,
41
"Show only the result with page faults before memcpy()"),
42
OPT_BOOLEAN('n', "no-prefault", &no_prefault,
43
"Show only the result without page faults before memcpy()"),
44
OPT_END()
45
};
46
47
typedef void *(*memcpy_t)(void *, const void *, size_t);
48
49
struct routine {
50
const char *name;
51
const char *desc;
52
memcpy_t fn;
53
};
54
55
struct routine routines[] = {
56
{ "default",
57
"Default memcpy() provided by glibc",
58
memcpy },
59
#ifdef ARCH_X86_64
60
61
#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
62
#include "mem-memcpy-x86-64-asm-def.h"
63
#undef MEMCPY_FN
64
65
#endif
66
67
{ NULL,
68
NULL,
69
NULL }
70
};
71
72
static const char * const bench_mem_memcpy_usage[] = {
73
"perf bench mem memcpy <options>",
74
NULL
75
};
76
77
static struct perf_event_attr clock_attr = {
78
.type = PERF_TYPE_HARDWARE,
79
.config = PERF_COUNT_HW_CPU_CYCLES
80
};
81
82
static void init_clock(void)
83
{
84
clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
85
86
if (clock_fd < 0 && errno == ENOSYS)
87
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
88
else
89
BUG_ON(clock_fd < 0);
90
}
91
92
static u64 get_clock(void)
93
{
94
int ret;
95
u64 clk;
96
97
ret = read(clock_fd, &clk, sizeof(u64));
98
BUG_ON(ret != sizeof(u64));
99
100
return clk;
101
}
102
103
static double timeval2double(struct timeval *ts)
104
{
105
return (double)ts->tv_sec +
106
(double)ts->tv_usec / (double)1000000;
107
}
108
109
static void alloc_mem(void **dst, void **src, size_t length)
110
{
111
*dst = zalloc(length);
112
if (!dst)
113
die("memory allocation failed - maybe length is too large?\n");
114
115
*src = zalloc(length);
116
if (!src)
117
die("memory allocation failed - maybe length is too large?\n");
118
}
119
120
static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
121
{
122
u64 clock_start = 0ULL, clock_end = 0ULL;
123
void *src = NULL, *dst = NULL;
124
125
alloc_mem(&src, &dst, len);
126
127
if (prefault)
128
fn(dst, src, len);
129
130
clock_start = get_clock();
131
fn(dst, src, len);
132
clock_end = get_clock();
133
134
free(src);
135
free(dst);
136
return clock_end - clock_start;
137
}
138
139
static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
140
{
141
struct timeval tv_start, tv_end, tv_diff;
142
void *src = NULL, *dst = NULL;
143
144
alloc_mem(&src, &dst, len);
145
146
if (prefault)
147
fn(dst, src, len);
148
149
BUG_ON(gettimeofday(&tv_start, NULL));
150
fn(dst, src, len);
151
BUG_ON(gettimeofday(&tv_end, NULL));
152
153
timersub(&tv_end, &tv_start, &tv_diff);
154
155
free(src);
156
free(dst);
157
return (double)((double)len / timeval2double(&tv_diff));
158
}
159
160
#define pf (no_prefault ? 0 : 1)
161
162
#define print_bps(x) do { \
163
if (x < K) \
164
printf(" %14lf B/Sec", x); \
165
else if (x < K * K) \
166
printf(" %14lfd KB/Sec", x / K); \
167
else if (x < K * K * K) \
168
printf(" %14lf MB/Sec", x / K / K); \
169
else \
170
printf(" %14lf GB/Sec", x / K / K / K); \
171
} while (0)
172
173
int bench_mem_memcpy(int argc, const char **argv,
174
const char *prefix __used)
175
{
176
int i;
177
size_t len;
178
double result_bps[2];
179
u64 result_clock[2];
180
181
argc = parse_options(argc, argv, options,
182
bench_mem_memcpy_usage, 0);
183
184
if (use_clock)
185
init_clock();
186
187
len = (size_t)perf_atoll((char *)length_str);
188
189
result_clock[0] = result_clock[1] = 0ULL;
190
result_bps[0] = result_bps[1] = 0.0;
191
192
if ((s64)len <= 0) {
193
fprintf(stderr, "Invalid length:%s\n", length_str);
194
return 1;
195
}
196
197
/* same to without specifying either of prefault and no-prefault */
198
if (only_prefault && no_prefault)
199
only_prefault = no_prefault = false;
200
201
for (i = 0; routines[i].name; i++) {
202
if (!strcmp(routines[i].name, routine))
203
break;
204
}
205
if (!routines[i].name) {
206
printf("Unknown routine:%s\n", routine);
207
printf("Available routines...\n");
208
for (i = 0; routines[i].name; i++) {
209
printf("\t%s ... %s\n",
210
routines[i].name, routines[i].desc);
211
}
212
return 1;
213
}
214
215
if (bench_format == BENCH_FORMAT_DEFAULT)
216
printf("# Copying %s Bytes ...\n\n", length_str);
217
218
if (!only_prefault && !no_prefault) {
219
/* show both of results */
220
if (use_clock) {
221
result_clock[0] =
222
do_memcpy_clock(routines[i].fn, len, false);
223
result_clock[1] =
224
do_memcpy_clock(routines[i].fn, len, true);
225
} else {
226
result_bps[0] =
227
do_memcpy_gettimeofday(routines[i].fn,
228
len, false);
229
result_bps[1] =
230
do_memcpy_gettimeofday(routines[i].fn,
231
len, true);
232
}
233
} else {
234
if (use_clock) {
235
result_clock[pf] =
236
do_memcpy_clock(routines[i].fn,
237
len, only_prefault);
238
} else {
239
result_bps[pf] =
240
do_memcpy_gettimeofday(routines[i].fn,
241
len, only_prefault);
242
}
243
}
244
245
switch (bench_format) {
246
case BENCH_FORMAT_DEFAULT:
247
if (!only_prefault && !no_prefault) {
248
if (use_clock) {
249
printf(" %14lf Clock/Byte\n",
250
(double)result_clock[0]
251
/ (double)len);
252
printf(" %14lf Clock/Byte (with prefault)\n",
253
(double)result_clock[1]
254
/ (double)len);
255
} else {
256
print_bps(result_bps[0]);
257
printf("\n");
258
print_bps(result_bps[1]);
259
printf(" (with prefault)\n");
260
}
261
} else {
262
if (use_clock) {
263
printf(" %14lf Clock/Byte",
264
(double)result_clock[pf]
265
/ (double)len);
266
} else
267
print_bps(result_bps[pf]);
268
269
printf("%s\n", only_prefault ? " (with prefault)" : "");
270
}
271
break;
272
case BENCH_FORMAT_SIMPLE:
273
if (!only_prefault && !no_prefault) {
274
if (use_clock) {
275
printf("%lf %lf\n",
276
(double)result_clock[0] / (double)len,
277
(double)result_clock[1] / (double)len);
278
} else {
279
printf("%lf %lf\n",
280
result_bps[0], result_bps[1]);
281
}
282
} else {
283
if (use_clock) {
284
printf("%lf\n", (double)result_clock[pf]
285
/ (double)len);
286
} else
287
printf("%lf\n", result_bps[pf]);
288
}
289
break;
290
default:
291
/* reaching this means there's some disaster: */
292
die("unknown format: %d\n", bench_format);
293
break;
294
}
295
296
return 0;
297
}
298
299