Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/lib/bpf/libbpf.c
48999 views
1
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3
/*
4
* Common eBPF ELF object loading operations.
5
*
6
* Copyright (C) 2013-2015 Alexei Starovoitov <[email protected]>
7
* Copyright (C) 2015 Wang Nan <[email protected]>
8
* Copyright (C) 2015 Huawei Inc.
9
* Copyright (C) 2017 Nicira, Inc.
10
* Copyright (C) 2019 Isovalent, Inc.
11
*/
12
13
#ifndef _GNU_SOURCE
14
#define _GNU_SOURCE
15
#endif
16
#include <stdlib.h>
17
#include <stdio.h>
18
#include <stdarg.h>
19
#include <libgen.h>
20
#include <inttypes.h>
21
#include <limits.h>
22
#include <string.h>
23
#include <unistd.h>
24
#include <endian.h>
25
#include <fcntl.h>
26
#include <errno.h>
27
#include <ctype.h>
28
#include <asm/unistd.h>
29
#include <linux/err.h>
30
#include <linux/kernel.h>
31
#include <linux/bpf.h>
32
#include <linux/btf.h>
33
#include <linux/filter.h>
34
#include <linux/limits.h>
35
#include <linux/perf_event.h>
36
#include <linux/bpf_perf_event.h>
37
#include <linux/ring_buffer.h>
38
#include <sys/epoll.h>
39
#include <sys/ioctl.h>
40
#include <sys/mman.h>
41
#include <sys/stat.h>
42
#include <sys/types.h>
43
#include <sys/vfs.h>
44
#include <sys/utsname.h>
45
#include <sys/resource.h>
46
#include <libelf.h>
47
#include <gelf.h>
48
#include <zlib.h>
49
50
#include "libbpf.h"
51
#include "bpf.h"
52
#include "btf.h"
53
#include "libbpf_internal.h"
54
#include "hashmap.h"
55
#include "bpf_gen_internal.h"
56
#include "zip.h"
57
58
#ifndef BPF_FS_MAGIC
59
#define BPF_FS_MAGIC 0xcafe4a11
60
#endif
61
62
#define MAX_EVENT_NAME_LEN 64
63
64
#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
65
66
#define BPF_INSN_SZ (sizeof(struct bpf_insn))
67
68
/* vsprintf() in __base_pr() uses nonliteral format string. It may break
69
* compilation if user enables corresponding warning. Disable it explicitly.
70
*/
71
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
72
73
#define __printf(a, b) __attribute__((format(printf, a, b)))
74
75
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
76
static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
77
static int map_set_def_max_entries(struct bpf_map *map);
78
79
static const char * const attach_type_name[] = {
80
[BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress",
81
[BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress",
82
[BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create",
83
[BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release",
84
[BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops",
85
[BPF_CGROUP_DEVICE] = "cgroup_device",
86
[BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind",
87
[BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind",
88
[BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect",
89
[BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect",
90
[BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect",
91
[BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind",
92
[BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind",
93
[BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername",
94
[BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername",
95
[BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername",
96
[BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname",
97
[BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname",
98
[BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname",
99
[BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg",
100
[BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg",
101
[BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg",
102
[BPF_CGROUP_SYSCTL] = "cgroup_sysctl",
103
[BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg",
104
[BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg",
105
[BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg",
106
[BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt",
107
[BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt",
108
[BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
109
[BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
110
[BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
111
[BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
112
[BPF_LIRC_MODE2] = "lirc_mode2",
113
[BPF_FLOW_DISSECTOR] = "flow_dissector",
114
[BPF_TRACE_RAW_TP] = "trace_raw_tp",
115
[BPF_TRACE_FENTRY] = "trace_fentry",
116
[BPF_TRACE_FEXIT] = "trace_fexit",
117
[BPF_MODIFY_RETURN] = "modify_return",
118
[BPF_LSM_MAC] = "lsm_mac",
119
[BPF_LSM_CGROUP] = "lsm_cgroup",
120
[BPF_SK_LOOKUP] = "sk_lookup",
121
[BPF_TRACE_ITER] = "trace_iter",
122
[BPF_XDP_DEVMAP] = "xdp_devmap",
123
[BPF_XDP_CPUMAP] = "xdp_cpumap",
124
[BPF_XDP] = "xdp",
125
[BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select",
126
[BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate",
127
[BPF_PERF_EVENT] = "perf_event",
128
[BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
129
[BPF_STRUCT_OPS] = "struct_ops",
130
[BPF_NETFILTER] = "netfilter",
131
[BPF_TCX_INGRESS] = "tcx_ingress",
132
[BPF_TCX_EGRESS] = "tcx_egress",
133
[BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi",
134
[BPF_NETKIT_PRIMARY] = "netkit_primary",
135
[BPF_NETKIT_PEER] = "netkit_peer",
136
[BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session",
137
[BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session",
138
};
139
140
static const char * const link_type_name[] = {
141
[BPF_LINK_TYPE_UNSPEC] = "unspec",
142
[BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
143
[BPF_LINK_TYPE_TRACING] = "tracing",
144
[BPF_LINK_TYPE_CGROUP] = "cgroup",
145
[BPF_LINK_TYPE_ITER] = "iter",
146
[BPF_LINK_TYPE_NETNS] = "netns",
147
[BPF_LINK_TYPE_XDP] = "xdp",
148
[BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
149
[BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
150
[BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
151
[BPF_LINK_TYPE_NETFILTER] = "netfilter",
152
[BPF_LINK_TYPE_TCX] = "tcx",
153
[BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi",
154
[BPF_LINK_TYPE_NETKIT] = "netkit",
155
[BPF_LINK_TYPE_SOCKMAP] = "sockmap",
156
};
157
158
static const char * const map_type_name[] = {
159
[BPF_MAP_TYPE_UNSPEC] = "unspec",
160
[BPF_MAP_TYPE_HASH] = "hash",
161
[BPF_MAP_TYPE_ARRAY] = "array",
162
[BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
163
[BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
164
[BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
165
[BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
166
[BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
167
[BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
168
[BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
169
[BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
170
[BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
171
[BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
172
[BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
173
[BPF_MAP_TYPE_DEVMAP] = "devmap",
174
[BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
175
[BPF_MAP_TYPE_SOCKMAP] = "sockmap",
176
[BPF_MAP_TYPE_CPUMAP] = "cpumap",
177
[BPF_MAP_TYPE_XSKMAP] = "xskmap",
178
[BPF_MAP_TYPE_SOCKHASH] = "sockhash",
179
[BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
180
[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
181
[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
182
[BPF_MAP_TYPE_QUEUE] = "queue",
183
[BPF_MAP_TYPE_STACK] = "stack",
184
[BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
185
[BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
186
[BPF_MAP_TYPE_RINGBUF] = "ringbuf",
187
[BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
188
[BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
189
[BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
190
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
191
[BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
192
[BPF_MAP_TYPE_ARENA] = "arena",
193
[BPF_MAP_TYPE_INSN_ARRAY] = "insn_array",
194
};
195
196
static const char * const prog_type_name[] = {
197
[BPF_PROG_TYPE_UNSPEC] = "unspec",
198
[BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
199
[BPF_PROG_TYPE_KPROBE] = "kprobe",
200
[BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
201
[BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
202
[BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
203
[BPF_PROG_TYPE_XDP] = "xdp",
204
[BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
205
[BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
206
[BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
207
[BPF_PROG_TYPE_LWT_IN] = "lwt_in",
208
[BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
209
[BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
210
[BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
211
[BPF_PROG_TYPE_SK_SKB] = "sk_skb",
212
[BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
213
[BPF_PROG_TYPE_SK_MSG] = "sk_msg",
214
[BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
215
[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
216
[BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
217
[BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
218
[BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
219
[BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
220
[BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
221
[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
222
[BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
223
[BPF_PROG_TYPE_TRACING] = "tracing",
224
[BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
225
[BPF_PROG_TYPE_EXT] = "ext",
226
[BPF_PROG_TYPE_LSM] = "lsm",
227
[BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
228
[BPF_PROG_TYPE_SYSCALL] = "syscall",
229
[BPF_PROG_TYPE_NETFILTER] = "netfilter",
230
};
231
232
static int __base_pr(enum libbpf_print_level level, const char *format,
233
va_list args)
234
{
235
const char *env_var = "LIBBPF_LOG_LEVEL";
236
static enum libbpf_print_level min_level = LIBBPF_INFO;
237
static bool initialized;
238
239
if (!initialized) {
240
char *verbosity;
241
242
initialized = true;
243
verbosity = getenv(env_var);
244
if (verbosity) {
245
if (strcasecmp(verbosity, "warn") == 0)
246
min_level = LIBBPF_WARN;
247
else if (strcasecmp(verbosity, "debug") == 0)
248
min_level = LIBBPF_DEBUG;
249
else if (strcasecmp(verbosity, "info") == 0)
250
min_level = LIBBPF_INFO;
251
else
252
fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n",
253
env_var, verbosity);
254
}
255
}
256
257
/* if too verbose, skip logging */
258
if (level > min_level)
259
return 0;
260
261
return vfprintf(stderr, format, args);
262
}
263
264
static libbpf_print_fn_t __libbpf_pr = __base_pr;
265
266
libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
267
{
268
libbpf_print_fn_t old_print_fn;
269
270
old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
271
272
return old_print_fn;
273
}
274
275
__printf(2, 3)
276
void libbpf_print(enum libbpf_print_level level, const char *format, ...)
277
{
278
va_list args;
279
int old_errno;
280
libbpf_print_fn_t print_fn;
281
282
print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
283
if (!print_fn)
284
return;
285
286
old_errno = errno;
287
288
va_start(args, format);
289
print_fn(level, format, args);
290
va_end(args);
291
292
errno = old_errno;
293
}
294
295
static void pr_perm_msg(int err)
296
{
297
struct rlimit limit;
298
char buf[100];
299
300
if (err != -EPERM || geteuid() != 0)
301
return;
302
303
err = getrlimit(RLIMIT_MEMLOCK, &limit);
304
if (err)
305
return;
306
307
if (limit.rlim_cur == RLIM_INFINITY)
308
return;
309
310
if (limit.rlim_cur < 1024)
311
snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
312
else if (limit.rlim_cur < 1024*1024)
313
snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
314
else
315
snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
316
317
pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
318
buf);
319
}
320
321
/* Copied from tools/perf/util/util.h */
322
#ifndef zfree
323
# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
324
#endif
325
326
#ifndef zclose
327
# define zclose(fd) ({ \
328
int ___err = 0; \
329
if ((fd) >= 0) \
330
___err = close((fd)); \
331
fd = -1; \
332
___err; })
333
#endif
334
335
static inline __u64 ptr_to_u64(const void *ptr)
336
{
337
return (__u64) (unsigned long) ptr;
338
}
339
340
int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
341
{
342
/* as of v1.0 libbpf_set_strict_mode() is a no-op */
343
return 0;
344
}
345
346
__u32 libbpf_major_version(void)
347
{
348
return LIBBPF_MAJOR_VERSION;
349
}
350
351
__u32 libbpf_minor_version(void)
352
{
353
return LIBBPF_MINOR_VERSION;
354
}
355
356
const char *libbpf_version_string(void)
357
{
358
#define __S(X) #X
359
#define _S(X) __S(X)
360
return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
361
#undef _S
362
#undef __S
363
}
364
365
enum reloc_type {
366
RELO_LD64,
367
RELO_CALL,
368
RELO_DATA,
369
RELO_EXTERN_LD64,
370
RELO_EXTERN_CALL,
371
RELO_SUBPROG_ADDR,
372
RELO_CORE,
373
RELO_INSN_ARRAY,
374
};
375
376
struct reloc_desc {
377
enum reloc_type type;
378
int insn_idx;
379
union {
380
const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
381
struct {
382
int map_idx;
383
int sym_off;
384
/*
385
* The following two fields can be unionized, as the
386
* ext_idx field is used for extern symbols, and the
387
* sym_size is used for jump tables, which are never
388
* extern
389
*/
390
union {
391
int ext_idx;
392
int sym_size;
393
};
394
};
395
};
396
};
397
398
/* stored as sec_def->cookie for all libbpf-supported SEC()s */
399
enum sec_def_flags {
400
SEC_NONE = 0,
401
/* expected_attach_type is optional, if kernel doesn't support that */
402
SEC_EXP_ATTACH_OPT = 1,
403
/* legacy, only used by libbpf_get_type_names() and
404
* libbpf_attach_type_by_name(), not used by libbpf itself at all.
405
* This used to be associated with cgroup (and few other) BPF programs
406
* that were attachable through BPF_PROG_ATTACH command. Pretty
407
* meaningless nowadays, though.
408
*/
409
SEC_ATTACHABLE = 2,
410
SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
411
/* attachment target is specified through BTF ID in either kernel or
412
* other BPF program's BTF object
413
*/
414
SEC_ATTACH_BTF = 4,
415
/* BPF program type allows sleeping/blocking in kernel */
416
SEC_SLEEPABLE = 8,
417
/* BPF program support non-linear XDP buffer */
418
SEC_XDP_FRAGS = 16,
419
/* Setup proper attach type for usdt probes. */
420
SEC_USDT = 32,
421
};
422
423
struct bpf_sec_def {
424
char *sec;
425
enum bpf_prog_type prog_type;
426
enum bpf_attach_type expected_attach_type;
427
long cookie;
428
int handler_id;
429
430
libbpf_prog_setup_fn_t prog_setup_fn;
431
libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
432
libbpf_prog_attach_fn_t prog_attach_fn;
433
};
434
435
struct bpf_light_subprog {
436
__u32 sec_insn_off;
437
__u32 sub_insn_off;
438
};
439
440
/*
441
* bpf_prog should be a better name but it has been used in
442
* linux/filter.h.
443
*/
444
struct bpf_program {
445
char *name;
446
char *sec_name;
447
size_t sec_idx;
448
const struct bpf_sec_def *sec_def;
449
/* this program's instruction offset (in number of instructions)
450
* within its containing ELF section
451
*/
452
size_t sec_insn_off;
453
/* number of original instructions in ELF section belonging to this
454
* program, not taking into account subprogram instructions possible
455
* appended later during relocation
456
*/
457
size_t sec_insn_cnt;
458
/* Offset (in number of instructions) of the start of instruction
459
* belonging to this BPF program within its containing main BPF
460
* program. For the entry-point (main) BPF program, this is always
461
* zero. For a sub-program, this gets reset before each of main BPF
462
* programs are processed and relocated and is used to determined
463
* whether sub-program was already appended to the main program, and
464
* if yes, at which instruction offset.
465
*/
466
size_t sub_insn_off;
467
468
/* instructions that belong to BPF program; insns[0] is located at
469
* sec_insn_off instruction within its ELF section in ELF file, so
470
* when mapping ELF file instruction index to the local instruction,
471
* one needs to subtract sec_insn_off; and vice versa.
472
*/
473
struct bpf_insn *insns;
474
/* actual number of instruction in this BPF program's image; for
475
* entry-point BPF programs this includes the size of main program
476
* itself plus all the used sub-programs, appended at the end
477
*/
478
size_t insns_cnt;
479
480
struct reloc_desc *reloc_desc;
481
int nr_reloc;
482
483
/* BPF verifier log settings */
484
char *log_buf;
485
size_t log_size;
486
__u32 log_level;
487
488
struct bpf_object *obj;
489
490
int fd;
491
bool autoload;
492
bool autoattach;
493
bool sym_global;
494
bool mark_btf_static;
495
enum bpf_prog_type type;
496
enum bpf_attach_type expected_attach_type;
497
int exception_cb_idx;
498
499
int prog_ifindex;
500
__u32 attach_btf_obj_fd;
501
__u32 attach_btf_id;
502
__u32 attach_prog_fd;
503
504
void *func_info;
505
__u32 func_info_rec_size;
506
__u32 func_info_cnt;
507
508
void *line_info;
509
__u32 line_info_rec_size;
510
__u32 line_info_cnt;
511
__u32 prog_flags;
512
__u8 hash[SHA256_DIGEST_LENGTH];
513
514
struct bpf_light_subprog *subprogs;
515
__u32 subprog_cnt;
516
};
517
518
struct bpf_struct_ops {
519
struct bpf_program **progs;
520
__u32 *kern_func_off;
521
/* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
522
void *data;
523
/* e.g. struct bpf_struct_ops_tcp_congestion_ops in
524
* btf_vmlinux's format.
525
* struct bpf_struct_ops_tcp_congestion_ops {
526
* [... some other kernel fields ...]
527
* struct tcp_congestion_ops data;
528
* }
529
* kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
530
* bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
531
* from "data".
532
*/
533
void *kern_vdata;
534
__u32 type_id;
535
};
536
537
#define DATA_SEC ".data"
538
#define BSS_SEC ".bss"
539
#define RODATA_SEC ".rodata"
540
#define KCONFIG_SEC ".kconfig"
541
#define KSYMS_SEC ".ksyms"
542
#define STRUCT_OPS_SEC ".struct_ops"
543
#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
544
#define ARENA_SEC ".addr_space.1"
545
546
enum libbpf_map_type {
547
LIBBPF_MAP_UNSPEC,
548
LIBBPF_MAP_DATA,
549
LIBBPF_MAP_BSS,
550
LIBBPF_MAP_RODATA,
551
LIBBPF_MAP_KCONFIG,
552
};
553
554
struct bpf_map_def {
555
unsigned int type;
556
unsigned int key_size;
557
unsigned int value_size;
558
unsigned int max_entries;
559
unsigned int map_flags;
560
};
561
562
struct bpf_map {
563
struct bpf_object *obj;
564
char *name;
565
/* real_name is defined for special internal maps (.rodata*,
566
* .data*, .bss, .kconfig) and preserves their original ELF section
567
* name. This is important to be able to find corresponding BTF
568
* DATASEC information.
569
*/
570
char *real_name;
571
int fd;
572
int sec_idx;
573
size_t sec_offset;
574
int map_ifindex;
575
int inner_map_fd;
576
struct bpf_map_def def;
577
__u32 numa_node;
578
__u32 btf_var_idx;
579
int mod_btf_fd;
580
__u32 btf_key_type_id;
581
__u32 btf_value_type_id;
582
__u32 btf_vmlinux_value_type_id;
583
enum libbpf_map_type libbpf_type;
584
void *mmaped;
585
struct bpf_struct_ops *st_ops;
586
struct bpf_map *inner_map;
587
void **init_slots;
588
int init_slots_sz;
589
char *pin_path;
590
bool pinned;
591
bool reused;
592
bool autocreate;
593
bool autoattach;
594
__u64 map_extra;
595
struct bpf_program *excl_prog;
596
};
597
598
enum extern_type {
599
EXT_UNKNOWN,
600
EXT_KCFG,
601
EXT_KSYM,
602
};
603
604
enum kcfg_type {
605
KCFG_UNKNOWN,
606
KCFG_CHAR,
607
KCFG_BOOL,
608
KCFG_INT,
609
KCFG_TRISTATE,
610
KCFG_CHAR_ARR,
611
};
612
613
struct extern_desc {
614
enum extern_type type;
615
int sym_idx;
616
int btf_id;
617
int sec_btf_id;
618
char *name;
619
char *essent_name;
620
bool is_set;
621
bool is_weak;
622
union {
623
struct {
624
enum kcfg_type type;
625
int sz;
626
int align;
627
int data_off;
628
bool is_signed;
629
} kcfg;
630
struct {
631
unsigned long long addr;
632
633
/* target btf_id of the corresponding kernel var. */
634
int kernel_btf_obj_fd;
635
int kernel_btf_id;
636
637
/* local btf_id of the ksym extern's type. */
638
__u32 type_id;
639
/* BTF fd index to be patched in for insn->off, this is
640
* 0 for vmlinux BTF, index in obj->fd_array for module
641
* BTF
642
*/
643
__s16 btf_fd_idx;
644
} ksym;
645
};
646
};
647
648
struct module_btf {
649
struct btf *btf;
650
char *name;
651
__u32 id;
652
int fd;
653
int fd_array_idx;
654
};
655
656
enum sec_type {
657
SEC_UNUSED = 0,
658
SEC_RELO,
659
SEC_BSS,
660
SEC_DATA,
661
SEC_RODATA,
662
SEC_ST_OPS,
663
};
664
665
struct elf_sec_desc {
666
enum sec_type sec_type;
667
Elf64_Shdr *shdr;
668
Elf_Data *data;
669
};
670
671
struct elf_state {
672
int fd;
673
const void *obj_buf;
674
size_t obj_buf_sz;
675
Elf *elf;
676
Elf64_Ehdr *ehdr;
677
Elf_Data *symbols;
678
Elf_Data *arena_data;
679
size_t shstrndx; /* section index for section name strings */
680
size_t strtabidx;
681
struct elf_sec_desc *secs;
682
size_t sec_cnt;
683
int btf_maps_shndx;
684
__u32 btf_maps_sec_btf_id;
685
int text_shndx;
686
int symbols_shndx;
687
bool has_st_ops;
688
int arena_data_shndx;
689
int jumptables_data_shndx;
690
};
691
692
struct usdt_manager;
693
694
enum bpf_object_state {
695
OBJ_OPEN,
696
OBJ_PREPARED,
697
OBJ_LOADED,
698
};
699
700
struct bpf_object {
701
char name[BPF_OBJ_NAME_LEN];
702
char license[64];
703
__u32 kern_version;
704
705
enum bpf_object_state state;
706
struct bpf_program *programs;
707
size_t nr_programs;
708
struct bpf_map *maps;
709
size_t nr_maps;
710
size_t maps_cap;
711
712
char *kconfig;
713
struct extern_desc *externs;
714
int nr_extern;
715
int kconfig_map_idx;
716
717
bool has_subcalls;
718
bool has_rodata;
719
720
struct bpf_gen *gen_loader;
721
722
/* Information when doing ELF related work. Only valid if efile.elf is not NULL */
723
struct elf_state efile;
724
725
unsigned char byteorder;
726
727
struct btf *btf;
728
struct btf_ext *btf_ext;
729
730
/* Parse and load BTF vmlinux if any of the programs in the object need
731
* it at load time.
732
*/
733
struct btf *btf_vmlinux;
734
/* Path to the custom BTF to be used for BPF CO-RE relocations as an
735
* override for vmlinux BTF.
736
*/
737
char *btf_custom_path;
738
/* vmlinux BTF override for CO-RE relocations */
739
struct btf *btf_vmlinux_override;
740
/* Lazily initialized kernel module BTFs */
741
struct module_btf *btf_modules;
742
bool btf_modules_loaded;
743
size_t btf_module_cnt;
744
size_t btf_module_cap;
745
746
/* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
747
char *log_buf;
748
size_t log_size;
749
__u32 log_level;
750
751
int *fd_array;
752
size_t fd_array_cap;
753
size_t fd_array_cnt;
754
755
struct usdt_manager *usdt_man;
756
757
int arena_map_idx;
758
void *arena_data;
759
size_t arena_data_sz;
760
761
void *jumptables_data;
762
size_t jumptables_data_sz;
763
764
struct {
765
struct bpf_program *prog;
766
int sym_off;
767
int fd;
768
} *jumptable_maps;
769
size_t jumptable_map_cnt;
770
771
struct kern_feature_cache *feat_cache;
772
char *token_path;
773
int token_fd;
774
775
char path[];
776
};
777
778
static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
779
static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
780
static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
781
static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
782
static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
783
static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
784
static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
785
static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
786
static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
787
788
void bpf_program__unload(struct bpf_program *prog)
789
{
790
if (!prog)
791
return;
792
793
zclose(prog->fd);
794
795
zfree(&prog->func_info);
796
zfree(&prog->line_info);
797
zfree(&prog->subprogs);
798
}
799
800
static void bpf_program__exit(struct bpf_program *prog)
801
{
802
if (!prog)
803
return;
804
805
bpf_program__unload(prog);
806
zfree(&prog->name);
807
zfree(&prog->sec_name);
808
zfree(&prog->insns);
809
zfree(&prog->reloc_desc);
810
811
prog->nr_reloc = 0;
812
prog->insns_cnt = 0;
813
prog->sec_idx = -1;
814
}
815
816
static bool insn_is_subprog_call(const struct bpf_insn *insn)
817
{
818
return BPF_CLASS(insn->code) == BPF_JMP &&
819
BPF_OP(insn->code) == BPF_CALL &&
820
BPF_SRC(insn->code) == BPF_K &&
821
insn->src_reg == BPF_PSEUDO_CALL &&
822
insn->dst_reg == 0 &&
823
insn->off == 0;
824
}
825
826
static bool is_call_insn(const struct bpf_insn *insn)
827
{
828
return insn->code == (BPF_JMP | BPF_CALL);
829
}
830
831
static bool insn_is_pseudo_func(struct bpf_insn *insn)
832
{
833
return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
834
}
835
836
static int
837
bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
838
const char *name, size_t sec_idx, const char *sec_name,
839
size_t sec_off, void *insn_data, size_t insn_data_sz)
840
{
841
if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
842
pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
843
sec_name, name, sec_off, insn_data_sz);
844
return -EINVAL;
845
}
846
847
memset(prog, 0, sizeof(*prog));
848
prog->obj = obj;
849
850
prog->sec_idx = sec_idx;
851
prog->sec_insn_off = sec_off / BPF_INSN_SZ;
852
prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
853
/* insns_cnt can later be increased by appending used subprograms */
854
prog->insns_cnt = prog->sec_insn_cnt;
855
856
prog->type = BPF_PROG_TYPE_UNSPEC;
857
prog->fd = -1;
858
prog->exception_cb_idx = -1;
859
860
/* libbpf's convention for SEC("?abc...") is that it's just like
861
* SEC("abc...") but the corresponding bpf_program starts out with
862
* autoload set to false.
863
*/
864
if (sec_name[0] == '?') {
865
prog->autoload = false;
866
/* from now on forget there was ? in section name */
867
sec_name++;
868
} else {
869
prog->autoload = true;
870
}
871
872
prog->autoattach = true;
873
874
/* inherit object's log_level */
875
prog->log_level = obj->log_level;
876
877
prog->sec_name = strdup(sec_name);
878
if (!prog->sec_name)
879
goto errout;
880
881
prog->name = strdup(name);
882
if (!prog->name)
883
goto errout;
884
885
prog->insns = malloc(insn_data_sz);
886
if (!prog->insns)
887
goto errout;
888
memcpy(prog->insns, insn_data, insn_data_sz);
889
890
return 0;
891
errout:
892
pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
893
bpf_program__exit(prog);
894
return -ENOMEM;
895
}
896
897
static int
898
bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
899
const char *sec_name, int sec_idx)
900
{
901
Elf_Data *symbols = obj->efile.symbols;
902
struct bpf_program *prog, *progs;
903
void *data = sec_data->d_buf;
904
size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
905
int nr_progs, err, i;
906
const char *name;
907
Elf64_Sym *sym;
908
909
progs = obj->programs;
910
nr_progs = obj->nr_programs;
911
nr_syms = symbols->d_size / sizeof(Elf64_Sym);
912
913
for (i = 0; i < nr_syms; i++) {
914
sym = elf_sym_by_idx(obj, i);
915
916
if (sym->st_shndx != sec_idx)
917
continue;
918
if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
919
continue;
920
921
prog_sz = sym->st_size;
922
sec_off = sym->st_value;
923
924
name = elf_sym_str(obj, sym->st_name);
925
if (!name) {
926
pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
927
sec_name, sec_off);
928
return -LIBBPF_ERRNO__FORMAT;
929
}
930
931
if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) {
932
pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
933
sec_name, sec_off);
934
return -LIBBPF_ERRNO__FORMAT;
935
}
936
937
if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
938
pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
939
return -ENOTSUP;
940
}
941
942
pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
943
sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
944
945
progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
946
if (!progs) {
947
/*
948
* In this case the original obj->programs
949
* is still valid, so don't need special treat for
950
* bpf_close_object().
951
*/
952
pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
953
sec_name, name);
954
return -ENOMEM;
955
}
956
obj->programs = progs;
957
958
prog = &progs[nr_progs];
959
960
err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
961
sec_off, data + sec_off, prog_sz);
962
if (err)
963
return err;
964
965
if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
966
prog->sym_global = true;
967
968
/* if function is a global/weak symbol, but has restricted
969
* (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
970
* as static to enable more permissive BPF verification mode
971
* with more outside context available to BPF verifier
972
*/
973
if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
974
|| ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
975
prog->mark_btf_static = true;
976
977
nr_progs++;
978
obj->nr_programs = nr_progs;
979
}
980
981
return 0;
982
}
983
984
static void bpf_object_bswap_progs(struct bpf_object *obj)
985
{
986
struct bpf_program *prog = obj->programs;
987
struct bpf_insn *insn;
988
int p, i;
989
990
for (p = 0; p < obj->nr_programs; p++, prog++) {
991
insn = prog->insns;
992
for (i = 0; i < prog->insns_cnt; i++, insn++)
993
bpf_insn_bswap(insn);
994
}
995
pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs);
996
}
997
998
static const struct btf_member *
999
find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
1000
{
1001
struct btf_member *m;
1002
int i;
1003
1004
for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
1005
if (btf_member_bit_offset(t, i) == bit_offset)
1006
return m;
1007
}
1008
1009
return NULL;
1010
}
1011
1012
static const struct btf_member *
1013
find_member_by_name(const struct btf *btf, const struct btf_type *t,
1014
const char *name)
1015
{
1016
struct btf_member *m;
1017
int i;
1018
1019
for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
1020
if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
1021
return m;
1022
}
1023
1024
return NULL;
1025
}
1026
1027
static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
1028
__u16 kind, struct btf **res_btf,
1029
struct module_btf **res_mod_btf);
1030
1031
#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
1032
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
1033
const char *name, __u32 kind);
1034
1035
static int
1036
find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
1037
struct module_btf **mod_btf,
1038
const struct btf_type **type, __u32 *type_id,
1039
const struct btf_type **vtype, __u32 *vtype_id,
1040
const struct btf_member **data_member)
1041
{
1042
const struct btf_type *kern_type, *kern_vtype;
1043
const struct btf_member *kern_data_member;
1044
struct btf *btf = NULL;
1045
__s32 kern_vtype_id, kern_type_id;
1046
char tname[192], stname[256];
1047
__u32 i;
1048
1049
snprintf(tname, sizeof(tname), "%.*s",
1050
(int)bpf_core_essential_name_len(tname_raw), tname_raw);
1051
1052
snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname);
1053
1054
/* Look for the corresponding "map_value" type that will be used
1055
* in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf
1056
* and the mod_btf.
1057
* For example, find "struct bpf_struct_ops_tcp_congestion_ops".
1058
*/
1059
kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf);
1060
if (kern_vtype_id < 0) {
1061
pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname);
1062
return kern_vtype_id;
1063
}
1064
kern_vtype = btf__type_by_id(btf, kern_vtype_id);
1065
1066
kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
1067
if (kern_type_id < 0) {
1068
pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname);
1069
return kern_type_id;
1070
}
1071
kern_type = btf__type_by_id(btf, kern_type_id);
1072
1073
/* Find "struct tcp_congestion_ops" from
1074
* struct bpf_struct_ops_tcp_congestion_ops {
1075
* [ ... ]
1076
* struct tcp_congestion_ops data;
1077
* }
1078
*/
1079
kern_data_member = btf_members(kern_vtype);
1080
for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
1081
if (kern_data_member->type == kern_type_id)
1082
break;
1083
}
1084
if (i == btf_vlen(kern_vtype)) {
1085
pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n",
1086
tname, stname);
1087
return -EINVAL;
1088
}
1089
1090
*type = kern_type;
1091
*type_id = kern_type_id;
1092
*vtype = kern_vtype;
1093
*vtype_id = kern_vtype_id;
1094
*data_member = kern_data_member;
1095
1096
return 0;
1097
}
1098
1099
static bool bpf_map__is_struct_ops(const struct bpf_map *map)
1100
{
1101
return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
1102
}
1103
1104
static bool is_valid_st_ops_program(struct bpf_object *obj,
1105
const struct bpf_program *prog)
1106
{
1107
int i;
1108
1109
for (i = 0; i < obj->nr_programs; i++) {
1110
if (&obj->programs[i] == prog)
1111
return prog->type == BPF_PROG_TYPE_STRUCT_OPS;
1112
}
1113
1114
return false;
1115
}
1116
1117
/* For each struct_ops program P, referenced from some struct_ops map M,
1118
* enable P.autoload if there are Ms for which M.autocreate is true,
1119
* disable P.autoload if for all Ms M.autocreate is false.
1120
* Don't change P.autoload for programs that are not referenced from any maps.
1121
*/
1122
static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj)
1123
{
1124
struct bpf_program *prog, *slot_prog;
1125
struct bpf_map *map;
1126
int i, j, k, vlen;
1127
1128
for (i = 0; i < obj->nr_programs; ++i) {
1129
int should_load = false;
1130
int use_cnt = 0;
1131
1132
prog = &obj->programs[i];
1133
if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
1134
continue;
1135
1136
for (j = 0; j < obj->nr_maps; ++j) {
1137
const struct btf_type *type;
1138
1139
map = &obj->maps[j];
1140
if (!bpf_map__is_struct_ops(map))
1141
continue;
1142
1143
type = btf__type_by_id(obj->btf, map->st_ops->type_id);
1144
vlen = btf_vlen(type);
1145
for (k = 0; k < vlen; ++k) {
1146
slot_prog = map->st_ops->progs[k];
1147
if (prog != slot_prog)
1148
continue;
1149
1150
use_cnt++;
1151
if (map->autocreate)
1152
should_load = true;
1153
}
1154
}
1155
if (use_cnt)
1156
prog->autoload = should_load;
1157
}
1158
1159
return 0;
1160
}
1161
1162
/* Init the map's fields that depend on kern_btf */
1163
static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
1164
{
1165
const struct btf_member *member, *kern_member, *kern_data_member;
1166
const struct btf_type *type, *kern_type, *kern_vtype;
1167
__u32 i, kern_type_id, kern_vtype_id, kern_data_off;
1168
struct bpf_object *obj = map->obj;
1169
const struct btf *btf = obj->btf;
1170
struct bpf_struct_ops *st_ops;
1171
const struct btf *kern_btf;
1172
struct module_btf *mod_btf = NULL;
1173
void *data, *kern_data;
1174
const char *tname;
1175
int err;
1176
1177
st_ops = map->st_ops;
1178
type = btf__type_by_id(btf, st_ops->type_id);
1179
tname = btf__name_by_offset(btf, type->name_off);
1180
err = find_struct_ops_kern_types(obj, tname, &mod_btf,
1181
&kern_type, &kern_type_id,
1182
&kern_vtype, &kern_vtype_id,
1183
&kern_data_member);
1184
if (err)
1185
return err;
1186
1187
kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux;
1188
1189
pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1190
map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1191
1192
map->mod_btf_fd = mod_btf ? mod_btf->fd : -1;
1193
map->def.value_size = kern_vtype->size;
1194
map->btf_vmlinux_value_type_id = kern_vtype_id;
1195
1196
st_ops->kern_vdata = calloc(1, kern_vtype->size);
1197
if (!st_ops->kern_vdata)
1198
return -ENOMEM;
1199
1200
data = st_ops->data;
1201
kern_data_off = kern_data_member->offset / 8;
1202
kern_data = st_ops->kern_vdata + kern_data_off;
1203
1204
member = btf_members(type);
1205
for (i = 0; i < btf_vlen(type); i++, member++) {
1206
const struct btf_type *mtype, *kern_mtype;
1207
__u32 mtype_id, kern_mtype_id;
1208
void *mdata, *kern_mdata;
1209
struct bpf_program *prog;
1210
__s64 msize, kern_msize;
1211
__u32 moff, kern_moff;
1212
__u32 kern_member_idx;
1213
const char *mname;
1214
1215
mname = btf__name_by_offset(btf, member->name_off);
1216
moff = member->offset / 8;
1217
mdata = data + moff;
1218
msize = btf__resolve_size(btf, member->type);
1219
if (msize < 0) {
1220
pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n",
1221
map->name, mname);
1222
return msize;
1223
}
1224
1225
kern_member = find_member_by_name(kern_btf, kern_type, mname);
1226
if (!kern_member) {
1227
if (!libbpf_is_mem_zeroed(mdata, msize)) {
1228
pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1229
map->name, mname);
1230
return -ENOTSUP;
1231
}
1232
1233
if (st_ops->progs[i]) {
1234
/* If we had declaratively set struct_ops callback, we need to
1235
* force its autoload to false, because it doesn't have
1236
* a chance of succeeding from POV of the current struct_ops map.
1237
* If this program is still referenced somewhere else, though,
1238
* then bpf_object_adjust_struct_ops_autoload() will update its
1239
* autoload accordingly.
1240
*/
1241
st_ops->progs[i]->autoload = false;
1242
st_ops->progs[i] = NULL;
1243
}
1244
1245
/* Skip all-zero/NULL fields if they are not present in the kernel BTF */
1246
pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n",
1247
map->name, mname);
1248
continue;
1249
}
1250
1251
kern_member_idx = kern_member - btf_members(kern_type);
1252
if (btf_member_bitfield_size(type, i) ||
1253
btf_member_bitfield_size(kern_type, kern_member_idx)) {
1254
pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1255
map->name, mname);
1256
return -ENOTSUP;
1257
}
1258
1259
kern_moff = kern_member->offset / 8;
1260
kern_mdata = kern_data + kern_moff;
1261
1262
mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1263
kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1264
&kern_mtype_id);
1265
if (BTF_INFO_KIND(mtype->info) !=
1266
BTF_INFO_KIND(kern_mtype->info)) {
1267
pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1268
map->name, mname, BTF_INFO_KIND(mtype->info),
1269
BTF_INFO_KIND(kern_mtype->info));
1270
return -ENOTSUP;
1271
}
1272
1273
if (btf_is_ptr(mtype)) {
1274
prog = *(void **)mdata;
1275
/* just like for !kern_member case above, reset declaratively
1276
* set (at compile time) program's autload to false,
1277
* if user replaced it with another program or NULL
1278
*/
1279
if (st_ops->progs[i] && st_ops->progs[i] != prog)
1280
st_ops->progs[i]->autoload = false;
1281
1282
/* Update the value from the shadow type */
1283
st_ops->progs[i] = prog;
1284
if (!prog)
1285
continue;
1286
1287
if (!is_valid_st_ops_program(obj, prog)) {
1288
pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n",
1289
map->name, mname);
1290
return -ENOTSUP;
1291
}
1292
1293
kern_mtype = skip_mods_and_typedefs(kern_btf,
1294
kern_mtype->type,
1295
&kern_mtype_id);
1296
1297
/* mtype->type must be a func_proto which was
1298
* guaranteed in bpf_object__collect_st_ops_relos(),
1299
* so only check kern_mtype for func_proto here.
1300
*/
1301
if (!btf_is_func_proto(kern_mtype)) {
1302
pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1303
map->name, mname);
1304
return -ENOTSUP;
1305
}
1306
1307
if (mod_btf)
1308
prog->attach_btf_obj_fd = mod_btf->fd;
1309
1310
/* if we haven't yet processed this BPF program, record proper
1311
* attach_btf_id and member_idx
1312
*/
1313
if (!prog->attach_btf_id) {
1314
prog->attach_btf_id = kern_type_id;
1315
prog->expected_attach_type = kern_member_idx;
1316
}
1317
1318
/* struct_ops BPF prog can be re-used between multiple
1319
* .struct_ops & .struct_ops.link as long as it's the
1320
* same struct_ops struct definition and the same
1321
* function pointer field
1322
*/
1323
if (prog->attach_btf_id != kern_type_id) {
1324
pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n",
1325
map->name, mname, prog->name, prog->sec_name, prog->type,
1326
prog->attach_btf_id, kern_type_id);
1327
return -EINVAL;
1328
}
1329
if (prog->expected_attach_type != kern_member_idx) {
1330
pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n",
1331
map->name, mname, prog->name, prog->sec_name, prog->type,
1332
prog->expected_attach_type, kern_member_idx);
1333
return -EINVAL;
1334
}
1335
1336
st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1337
1338
pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1339
map->name, mname, prog->name, moff,
1340
kern_moff);
1341
1342
continue;
1343
}
1344
1345
kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1346
if (kern_msize < 0 || msize != kern_msize) {
1347
pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1348
map->name, mname, (ssize_t)msize,
1349
(ssize_t)kern_msize);
1350
return -ENOTSUP;
1351
}
1352
1353
pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1354
map->name, mname, (unsigned int)msize,
1355
moff, kern_moff);
1356
memcpy(kern_mdata, mdata, msize);
1357
}
1358
1359
return 0;
1360
}
1361
1362
static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1363
{
1364
struct bpf_map *map;
1365
size_t i;
1366
int err;
1367
1368
for (i = 0; i < obj->nr_maps; i++) {
1369
map = &obj->maps[i];
1370
1371
if (!bpf_map__is_struct_ops(map))
1372
continue;
1373
1374
if (!map->autocreate)
1375
continue;
1376
1377
err = bpf_map__init_kern_struct_ops(map);
1378
if (err)
1379
return err;
1380
}
1381
1382
return 0;
1383
}
1384
1385
static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
1386
int shndx, Elf_Data *data)
1387
{
1388
const struct btf_type *type, *datasec;
1389
const struct btf_var_secinfo *vsi;
1390
struct bpf_struct_ops *st_ops;
1391
const char *tname, *var_name;
1392
__s32 type_id, datasec_id;
1393
const struct btf *btf;
1394
struct bpf_map *map;
1395
__u32 i;
1396
1397
if (shndx == -1)
1398
return 0;
1399
1400
btf = obj->btf;
1401
datasec_id = btf__find_by_name_kind(btf, sec_name,
1402
BTF_KIND_DATASEC);
1403
if (datasec_id < 0) {
1404
pr_warn("struct_ops init: DATASEC %s not found\n",
1405
sec_name);
1406
return -EINVAL;
1407
}
1408
1409
datasec = btf__type_by_id(btf, datasec_id);
1410
vsi = btf_var_secinfos(datasec);
1411
for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1412
type = btf__type_by_id(obj->btf, vsi->type);
1413
var_name = btf__name_by_offset(obj->btf, type->name_off);
1414
1415
type_id = btf__resolve_type(obj->btf, vsi->type);
1416
if (type_id < 0) {
1417
pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1418
vsi->type, sec_name);
1419
return -EINVAL;
1420
}
1421
1422
type = btf__type_by_id(obj->btf, type_id);
1423
tname = btf__name_by_offset(obj->btf, type->name_off);
1424
if (!tname[0]) {
1425
pr_warn("struct_ops init: anonymous type is not supported\n");
1426
return -ENOTSUP;
1427
}
1428
if (!btf_is_struct(type)) {
1429
pr_warn("struct_ops init: %s is not a struct\n", tname);
1430
return -EINVAL;
1431
}
1432
1433
map = bpf_object__add_map(obj);
1434
if (IS_ERR(map))
1435
return PTR_ERR(map);
1436
1437
map->sec_idx = shndx;
1438
map->sec_offset = vsi->offset;
1439
map->name = strdup(var_name);
1440
if (!map->name)
1441
return -ENOMEM;
1442
map->btf_value_type_id = type_id;
1443
1444
/* Follow same convention as for programs autoload:
1445
* SEC("?.struct_ops") means map is not created by default.
1446
*/
1447
if (sec_name[0] == '?') {
1448
map->autocreate = false;
1449
/* from now on forget there was ? in section name */
1450
sec_name++;
1451
}
1452
1453
map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1454
map->def.key_size = sizeof(int);
1455
map->def.value_size = type->size;
1456
map->def.max_entries = 1;
1457
map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0;
1458
map->autoattach = true;
1459
1460
map->st_ops = calloc(1, sizeof(*map->st_ops));
1461
if (!map->st_ops)
1462
return -ENOMEM;
1463
st_ops = map->st_ops;
1464
st_ops->data = malloc(type->size);
1465
st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1466
st_ops->kern_func_off = malloc(btf_vlen(type) *
1467
sizeof(*st_ops->kern_func_off));
1468
if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1469
return -ENOMEM;
1470
1471
if (vsi->offset + type->size > data->d_size) {
1472
pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1473
var_name, sec_name);
1474
return -EINVAL;
1475
}
1476
1477
memcpy(st_ops->data,
1478
data->d_buf + vsi->offset,
1479
type->size);
1480
st_ops->type_id = type_id;
1481
1482
pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1483
tname, type_id, var_name, vsi->offset);
1484
}
1485
1486
return 0;
1487
}
1488
1489
static int bpf_object_init_struct_ops(struct bpf_object *obj)
1490
{
1491
const char *sec_name;
1492
int sec_idx, err;
1493
1494
for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) {
1495
struct elf_sec_desc *desc = &obj->efile.secs[sec_idx];
1496
1497
if (desc->sec_type != SEC_ST_OPS)
1498
continue;
1499
1500
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1501
if (!sec_name)
1502
return -LIBBPF_ERRNO__FORMAT;
1503
1504
err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data);
1505
if (err)
1506
return err;
1507
}
1508
1509
return 0;
1510
}
1511
1512
static struct bpf_object *bpf_object__new(const char *path,
1513
const void *obj_buf,
1514
size_t obj_buf_sz,
1515
const char *obj_name)
1516
{
1517
struct bpf_object *obj;
1518
char *end;
1519
1520
obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1521
if (!obj) {
1522
pr_warn("alloc memory failed for %s\n", path);
1523
return ERR_PTR(-ENOMEM);
1524
}
1525
1526
strcpy(obj->path, path);
1527
if (obj_name) {
1528
libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1529
} else {
1530
/* Using basename() GNU version which doesn't modify arg. */
1531
libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1532
end = strchr(obj->name, '.');
1533
if (end)
1534
*end = 0;
1535
}
1536
1537
obj->efile.fd = -1;
1538
/*
1539
* Caller of this function should also call
1540
* bpf_object__elf_finish() after data collection to return
1541
* obj_buf to user. If not, we should duplicate the buffer to
1542
* avoid user freeing them before elf finish.
1543
*/
1544
obj->efile.obj_buf = obj_buf;
1545
obj->efile.obj_buf_sz = obj_buf_sz;
1546
obj->efile.btf_maps_shndx = -1;
1547
obj->kconfig_map_idx = -1;
1548
obj->arena_map_idx = -1;
1549
1550
obj->kern_version = get_kernel_version();
1551
obj->state = OBJ_OPEN;
1552
1553
return obj;
1554
}
1555
1556
static void bpf_object__elf_finish(struct bpf_object *obj)
1557
{
1558
if (!obj->efile.elf)
1559
return;
1560
1561
elf_end(obj->efile.elf);
1562
obj->efile.elf = NULL;
1563
obj->efile.ehdr = NULL;
1564
obj->efile.symbols = NULL;
1565
obj->efile.arena_data = NULL;
1566
1567
zfree(&obj->efile.secs);
1568
obj->efile.sec_cnt = 0;
1569
zclose(obj->efile.fd);
1570
obj->efile.obj_buf = NULL;
1571
obj->efile.obj_buf_sz = 0;
1572
}
1573
1574
static int bpf_object__elf_init(struct bpf_object *obj)
1575
{
1576
Elf64_Ehdr *ehdr;
1577
int err = 0;
1578
Elf *elf;
1579
1580
if (obj->efile.elf) {
1581
pr_warn("elf: init internal error\n");
1582
return -LIBBPF_ERRNO__LIBELF;
1583
}
1584
1585
if (obj->efile.obj_buf_sz > 0) {
1586
/* obj_buf should have been validated by bpf_object__open_mem(). */
1587
elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1588
} else {
1589
obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1590
if (obj->efile.fd < 0) {
1591
err = -errno;
1592
pr_warn("elf: failed to open %s: %s\n", obj->path, errstr(err));
1593
return err;
1594
}
1595
1596
elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1597
}
1598
1599
if (!elf) {
1600
pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1601
err = -LIBBPF_ERRNO__LIBELF;
1602
goto errout;
1603
}
1604
1605
obj->efile.elf = elf;
1606
1607
if (elf_kind(elf) != ELF_K_ELF) {
1608
err = -LIBBPF_ERRNO__FORMAT;
1609
pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1610
goto errout;
1611
}
1612
1613
if (gelf_getclass(elf) != ELFCLASS64) {
1614
err = -LIBBPF_ERRNO__FORMAT;
1615
pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1616
goto errout;
1617
}
1618
1619
obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1620
if (!obj->efile.ehdr) {
1621
pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1622
err = -LIBBPF_ERRNO__FORMAT;
1623
goto errout;
1624
}
1625
1626
/* Validate ELF object endianness... */
1627
if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
1628
ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
1629
err = -LIBBPF_ERRNO__ENDIAN;
1630
pr_warn("elf: '%s' has unknown byte order\n", obj->path);
1631
goto errout;
1632
}
1633
/* and save after bpf_object_open() frees ELF data */
1634
obj->byteorder = ehdr->e_ident[EI_DATA];
1635
1636
if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1637
pr_warn("elf: failed to get section names section index for %s: %s\n",
1638
obj->path, elf_errmsg(-1));
1639
err = -LIBBPF_ERRNO__FORMAT;
1640
goto errout;
1641
}
1642
1643
/* ELF is corrupted/truncated, avoid calling elf_strptr. */
1644
if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1645
pr_warn("elf: failed to get section names strings from %s: %s\n",
1646
obj->path, elf_errmsg(-1));
1647
err = -LIBBPF_ERRNO__FORMAT;
1648
goto errout;
1649
}
1650
1651
/* Old LLVM set e_machine to EM_NONE */
1652
if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1653
pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1654
err = -LIBBPF_ERRNO__FORMAT;
1655
goto errout;
1656
}
1657
1658
return 0;
1659
errout:
1660
bpf_object__elf_finish(obj);
1661
return err;
1662
}
1663
1664
static bool is_native_endianness(struct bpf_object *obj)
1665
{
1666
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1667
return obj->byteorder == ELFDATA2LSB;
1668
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1669
return obj->byteorder == ELFDATA2MSB;
1670
#else
1671
# error "Unrecognized __BYTE_ORDER__"
1672
#endif
1673
}
1674
1675
static int
1676
bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1677
{
1678
if (!data) {
1679
pr_warn("invalid license section in %s\n", obj->path);
1680
return -LIBBPF_ERRNO__FORMAT;
1681
}
1682
/* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1683
* go over allowed ELF data section buffer
1684
*/
1685
libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1686
pr_debug("license of %s is %s\n", obj->path, obj->license);
1687
return 0;
1688
}
1689
1690
static int
1691
bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1692
{
1693
__u32 kver;
1694
1695
if (!data || size != sizeof(kver)) {
1696
pr_warn("invalid kver section in %s\n", obj->path);
1697
return -LIBBPF_ERRNO__FORMAT;
1698
}
1699
memcpy(&kver, data, sizeof(kver));
1700
obj->kern_version = kver;
1701
pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1702
return 0;
1703
}
1704
1705
static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1706
{
1707
if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1708
type == BPF_MAP_TYPE_HASH_OF_MAPS)
1709
return true;
1710
return false;
1711
}
1712
1713
static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1714
{
1715
Elf_Data *data;
1716
Elf_Scn *scn;
1717
1718
if (!name)
1719
return -EINVAL;
1720
1721
scn = elf_sec_by_name(obj, name);
1722
data = elf_sec_data(obj, scn);
1723
if (data) {
1724
*size = data->d_size;
1725
return 0; /* found it */
1726
}
1727
1728
return -ENOENT;
1729
}
1730
1731
static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
1732
{
1733
Elf_Data *symbols = obj->efile.symbols;
1734
const char *sname;
1735
size_t si;
1736
1737
for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1738
Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1739
1740
if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1741
continue;
1742
1743
if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1744
ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1745
continue;
1746
1747
sname = elf_sym_str(obj, sym->st_name);
1748
if (!sname) {
1749
pr_warn("failed to get sym name string for var %s\n", name);
1750
return ERR_PTR(-EIO);
1751
}
1752
if (strcmp(name, sname) == 0)
1753
return sym;
1754
}
1755
1756
return ERR_PTR(-ENOENT);
1757
}
1758
1759
#ifndef MFD_CLOEXEC
1760
#define MFD_CLOEXEC 0x0001U
1761
#endif
1762
#ifndef MFD_NOEXEC_SEAL
1763
#define MFD_NOEXEC_SEAL 0x0008U
1764
#endif
1765
1766
static int create_placeholder_fd(void)
1767
{
1768
unsigned int flags = MFD_CLOEXEC | MFD_NOEXEC_SEAL;
1769
const char *name = "libbpf-placeholder-fd";
1770
int fd;
1771
1772
fd = ensure_good_fd(sys_memfd_create(name, flags));
1773
if (fd >= 0)
1774
return fd;
1775
else if (errno != EINVAL)
1776
return -errno;
1777
1778
/* Possibly running on kernel without MFD_NOEXEC_SEAL */
1779
fd = ensure_good_fd(sys_memfd_create(name, flags & ~MFD_NOEXEC_SEAL));
1780
if (fd < 0)
1781
return -errno;
1782
return fd;
1783
}
1784
1785
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1786
{
1787
struct bpf_map *map;
1788
int err;
1789
1790
err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1791
sizeof(*obj->maps), obj->nr_maps + 1);
1792
if (err)
1793
return ERR_PTR(err);
1794
1795
map = &obj->maps[obj->nr_maps++];
1796
map->obj = obj;
1797
/* Preallocate map FD without actually creating BPF map just yet.
1798
* These map FD "placeholders" will be reused later without changing
1799
* FD value when map is actually created in the kernel.
1800
*
1801
* This is useful to be able to perform BPF program relocations
1802
* without having to create BPF maps before that step. This allows us
1803
* to finalize and load BTF very late in BPF object's loading phase,
1804
* right before BPF maps have to be created and BPF programs have to
1805
* be loaded. By having these map FD placeholders we can perform all
1806
* the sanitizations, relocations, and any other adjustments before we
1807
* start creating actual BPF kernel objects (BTF, maps, progs).
1808
*/
1809
map->fd = create_placeholder_fd();
1810
if (map->fd < 0)
1811
return ERR_PTR(map->fd);
1812
map->inner_map_fd = -1;
1813
map->autocreate = true;
1814
1815
return map;
1816
}
1817
1818
static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
1819
{
1820
const long page_sz = sysconf(_SC_PAGE_SIZE);
1821
size_t map_sz;
1822
1823
map_sz = (size_t)roundup(value_sz, 8) * max_entries;
1824
map_sz = roundup(map_sz, page_sz);
1825
return map_sz;
1826
}
1827
1828
static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1829
{
1830
const long page_sz = sysconf(_SC_PAGE_SIZE);
1831
1832
switch (map->def.type) {
1833
case BPF_MAP_TYPE_ARRAY:
1834
return array_map_mmap_sz(map->def.value_size, map->def.max_entries);
1835
case BPF_MAP_TYPE_ARENA:
1836
return page_sz * map->def.max_entries;
1837
default:
1838
return 0; /* not supported */
1839
}
1840
}
1841
1842
static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
1843
{
1844
void *mmaped;
1845
1846
if (!map->mmaped)
1847
return -EINVAL;
1848
1849
if (old_sz == new_sz)
1850
return 0;
1851
1852
mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1853
if (mmaped == MAP_FAILED)
1854
return -errno;
1855
1856
memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
1857
munmap(map->mmaped, old_sz);
1858
map->mmaped = mmaped;
1859
return 0;
1860
}
1861
1862
static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1863
{
1864
char map_name[BPF_OBJ_NAME_LEN], *p;
1865
int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1866
1867
/* This is one of the more confusing parts of libbpf for various
1868
* reasons, some of which are historical. The original idea for naming
1869
* internal names was to include as much of BPF object name prefix as
1870
* possible, so that it can be distinguished from similar internal
1871
* maps of a different BPF object.
1872
* As an example, let's say we have bpf_object named 'my_object_name'
1873
* and internal map corresponding to '.rodata' ELF section. The final
1874
* map name advertised to user and to the kernel will be
1875
* 'my_objec.rodata', taking first 8 characters of object name and
1876
* entire 7 characters of '.rodata'.
1877
* Somewhat confusingly, if internal map ELF section name is shorter
1878
* than 7 characters, e.g., '.bss', we still reserve 7 characters
1879
* for the suffix, even though we only have 4 actual characters, and
1880
* resulting map will be called 'my_objec.bss', not even using all 15
1881
* characters allowed by the kernel. Oh well, at least the truncated
1882
* object name is somewhat consistent in this case. But if the map
1883
* name is '.kconfig', we'll still have entirety of '.kconfig' added
1884
* (8 chars) and thus will be left with only first 7 characters of the
1885
* object name ('my_obje'). Happy guessing, user, that the final map
1886
* name will be "my_obje.kconfig".
1887
* Now, with libbpf starting to support arbitrarily named .rodata.*
1888
* and .data.* data sections, it's possible that ELF section name is
1889
* longer than allowed 15 chars, so we now need to be careful to take
1890
* only up to 15 first characters of ELF name, taking no BPF object
1891
* name characters at all. So '.rodata.abracadabra' will result in
1892
* '.rodata.abracad' kernel and user-visible name.
1893
* We need to keep this convoluted logic intact for .data, .bss and
1894
* .rodata maps, but for new custom .data.custom and .rodata.custom
1895
* maps we use their ELF names as is, not prepending bpf_object name
1896
* in front. We still need to truncate them to 15 characters for the
1897
* kernel. Full name can be recovered for such maps by using DATASEC
1898
* BTF type associated with such map's value type, though.
1899
*/
1900
if (sfx_len >= BPF_OBJ_NAME_LEN)
1901
sfx_len = BPF_OBJ_NAME_LEN - 1;
1902
1903
/* if there are two or more dots in map name, it's a custom dot map */
1904
if (strchr(real_name + 1, '.') != NULL)
1905
pfx_len = 0;
1906
else
1907
pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1908
1909
snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1910
sfx_len, real_name);
1911
1912
/* sanities map name to characters allowed by kernel */
1913
for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1914
if (!isalnum(*p) && *p != '_' && *p != '.')
1915
*p = '_';
1916
1917
return strdup(map_name);
1918
}
1919
1920
static int
1921
map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
1922
1923
/* Internal BPF map is mmap()'able only if at least one of corresponding
1924
* DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
1925
* variable and it's not marked as __hidden (which turns it into, effectively,
1926
* a STATIC variable).
1927
*/
1928
static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
1929
{
1930
const struct btf_type *t, *vt;
1931
struct btf_var_secinfo *vsi;
1932
int i, n;
1933
1934
if (!map->btf_value_type_id)
1935
return false;
1936
1937
t = btf__type_by_id(obj->btf, map->btf_value_type_id);
1938
if (!btf_is_datasec(t))
1939
return false;
1940
1941
vsi = btf_var_secinfos(t);
1942
for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
1943
vt = btf__type_by_id(obj->btf, vsi->type);
1944
if (!btf_is_var(vt))
1945
continue;
1946
1947
if (btf_var(vt)->linkage != BTF_VAR_STATIC)
1948
return true;
1949
}
1950
1951
return false;
1952
}
1953
1954
static int
1955
bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1956
const char *real_name, int sec_idx, void *data, size_t data_sz)
1957
{
1958
struct bpf_map_def *def;
1959
struct bpf_map *map;
1960
size_t mmap_sz;
1961
int err;
1962
1963
map = bpf_object__add_map(obj);
1964
if (IS_ERR(map))
1965
return PTR_ERR(map);
1966
1967
map->libbpf_type = type;
1968
map->sec_idx = sec_idx;
1969
map->sec_offset = 0;
1970
map->real_name = strdup(real_name);
1971
map->name = internal_map_name(obj, real_name);
1972
if (!map->real_name || !map->name) {
1973
zfree(&map->real_name);
1974
zfree(&map->name);
1975
return -ENOMEM;
1976
}
1977
1978
def = &map->def;
1979
def->type = BPF_MAP_TYPE_ARRAY;
1980
def->key_size = sizeof(int);
1981
def->value_size = data_sz;
1982
def->max_entries = 1;
1983
def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1984
? BPF_F_RDONLY_PROG : 0;
1985
1986
/* failures are fine because of maps like .rodata.str1.1 */
1987
(void) map_fill_btf_type_info(obj, map);
1988
1989
if (map_is_mmapable(obj, map))
1990
def->map_flags |= BPF_F_MMAPABLE;
1991
1992
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1993
map->name, map->sec_idx, map->sec_offset, def->map_flags);
1994
1995
mmap_sz = bpf_map_mmap_sz(map);
1996
map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
1997
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1998
if (map->mmaped == MAP_FAILED) {
1999
err = -errno;
2000
map->mmaped = NULL;
2001
pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err));
2002
zfree(&map->real_name);
2003
zfree(&map->name);
2004
return err;
2005
}
2006
2007
if (data)
2008
memcpy(map->mmaped, data, data_sz);
2009
2010
pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
2011
return 0;
2012
}
2013
2014
static int bpf_object__init_global_data_maps(struct bpf_object *obj)
2015
{
2016
struct elf_sec_desc *sec_desc;
2017
const char *sec_name;
2018
int err = 0, sec_idx;
2019
2020
/*
2021
* Populate obj->maps with libbpf internal maps.
2022
*/
2023
for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
2024
sec_desc = &obj->efile.secs[sec_idx];
2025
2026
/* Skip recognized sections with size 0. */
2027
if (!sec_desc->data || sec_desc->data->d_size == 0)
2028
continue;
2029
2030
switch (sec_desc->sec_type) {
2031
case SEC_DATA:
2032
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
2033
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
2034
sec_name, sec_idx,
2035
sec_desc->data->d_buf,
2036
sec_desc->data->d_size);
2037
break;
2038
case SEC_RODATA:
2039
obj->has_rodata = true;
2040
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
2041
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
2042
sec_name, sec_idx,
2043
sec_desc->data->d_buf,
2044
sec_desc->data->d_size);
2045
break;
2046
case SEC_BSS:
2047
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
2048
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
2049
sec_name, sec_idx,
2050
NULL,
2051
sec_desc->data->d_size);
2052
break;
2053
default:
2054
/* skip */
2055
break;
2056
}
2057
if (err)
2058
return err;
2059
}
2060
return 0;
2061
}
2062
2063
2064
static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
2065
const void *name)
2066
{
2067
int i;
2068
2069
for (i = 0; i < obj->nr_extern; i++) {
2070
if (strcmp(obj->externs[i].name, name) == 0)
2071
return &obj->externs[i];
2072
}
2073
return NULL;
2074
}
2075
2076
static struct extern_desc *find_extern_by_name_with_len(const struct bpf_object *obj,
2077
const void *name, int len)
2078
{
2079
const char *ext_name;
2080
int i;
2081
2082
for (i = 0; i < obj->nr_extern; i++) {
2083
ext_name = obj->externs[i].name;
2084
if (strlen(ext_name) == len && strncmp(ext_name, name, len) == 0)
2085
return &obj->externs[i];
2086
}
2087
return NULL;
2088
}
2089
2090
static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
2091
char value)
2092
{
2093
switch (ext->kcfg.type) {
2094
case KCFG_BOOL:
2095
if (value == 'm') {
2096
pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
2097
ext->name, value);
2098
return -EINVAL;
2099
}
2100
*(bool *)ext_val = value == 'y' ? true : false;
2101
break;
2102
case KCFG_TRISTATE:
2103
if (value == 'y')
2104
*(enum libbpf_tristate *)ext_val = TRI_YES;
2105
else if (value == 'm')
2106
*(enum libbpf_tristate *)ext_val = TRI_MODULE;
2107
else /* value == 'n' */
2108
*(enum libbpf_tristate *)ext_val = TRI_NO;
2109
break;
2110
case KCFG_CHAR:
2111
*(char *)ext_val = value;
2112
break;
2113
case KCFG_UNKNOWN:
2114
case KCFG_INT:
2115
case KCFG_CHAR_ARR:
2116
default:
2117
pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
2118
ext->name, value);
2119
return -EINVAL;
2120
}
2121
ext->is_set = true;
2122
return 0;
2123
}
2124
2125
static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
2126
const char *value)
2127
{
2128
size_t len;
2129
2130
if (ext->kcfg.type != KCFG_CHAR_ARR) {
2131
pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
2132
ext->name, value);
2133
return -EINVAL;
2134
}
2135
2136
len = strlen(value);
2137
if (len < 2 || value[len - 1] != '"') {
2138
pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
2139
ext->name, value);
2140
return -EINVAL;
2141
}
2142
2143
/* strip quotes */
2144
len -= 2;
2145
if (len >= ext->kcfg.sz) {
2146
pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
2147
ext->name, value, len, ext->kcfg.sz - 1);
2148
len = ext->kcfg.sz - 1;
2149
}
2150
memcpy(ext_val, value + 1, len);
2151
ext_val[len] = '\0';
2152
ext->is_set = true;
2153
return 0;
2154
}
2155
2156
static int parse_u64(const char *value, __u64 *res)
2157
{
2158
char *value_end;
2159
int err;
2160
2161
errno = 0;
2162
*res = strtoull(value, &value_end, 0);
2163
if (errno) {
2164
err = -errno;
2165
pr_warn("failed to parse '%s': %s\n", value, errstr(err));
2166
return err;
2167
}
2168
if (*value_end) {
2169
pr_warn("failed to parse '%s' as integer completely\n", value);
2170
return -EINVAL;
2171
}
2172
return 0;
2173
}
2174
2175
static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
2176
{
2177
int bit_sz = ext->kcfg.sz * 8;
2178
2179
if (ext->kcfg.sz == 8)
2180
return true;
2181
2182
/* Validate that value stored in u64 fits in integer of `ext->sz`
2183
* bytes size without any loss of information. If the target integer
2184
* is signed, we rely on the following limits of integer type of
2185
* Y bits and subsequent transformation:
2186
*
2187
* -2^(Y-1) <= X <= 2^(Y-1) - 1
2188
* 0 <= X + 2^(Y-1) <= 2^Y - 1
2189
* 0 <= X + 2^(Y-1) < 2^Y
2190
*
2191
* For unsigned target integer, check that all the (64 - Y) bits are
2192
* zero.
2193
*/
2194
if (ext->kcfg.is_signed)
2195
return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
2196
else
2197
return (v >> bit_sz) == 0;
2198
}
2199
2200
static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
2201
__u64 value)
2202
{
2203
if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
2204
ext->kcfg.type != KCFG_BOOL) {
2205
pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
2206
ext->name, (unsigned long long)value);
2207
return -EINVAL;
2208
}
2209
if (ext->kcfg.type == KCFG_BOOL && value > 1) {
2210
pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
2211
ext->name, (unsigned long long)value);
2212
return -EINVAL;
2213
2214
}
2215
if (!is_kcfg_value_in_range(ext, value)) {
2216
pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
2217
ext->name, (unsigned long long)value, ext->kcfg.sz);
2218
return -ERANGE;
2219
}
2220
switch (ext->kcfg.sz) {
2221
case 1:
2222
*(__u8 *)ext_val = value;
2223
break;
2224
case 2:
2225
*(__u16 *)ext_val = value;
2226
break;
2227
case 4:
2228
*(__u32 *)ext_val = value;
2229
break;
2230
case 8:
2231
*(__u64 *)ext_val = value;
2232
break;
2233
default:
2234
return -EINVAL;
2235
}
2236
ext->is_set = true;
2237
return 0;
2238
}
2239
2240
static int bpf_object__process_kconfig_line(struct bpf_object *obj,
2241
char *buf, void *data)
2242
{
2243
struct extern_desc *ext;
2244
char *sep, *value;
2245
int len, err = 0;
2246
void *ext_val;
2247
__u64 num;
2248
2249
if (!str_has_pfx(buf, "CONFIG_"))
2250
return 0;
2251
2252
sep = strchr(buf, '=');
2253
if (!sep) {
2254
pr_warn("failed to parse '%s': no separator\n", buf);
2255
return -EINVAL;
2256
}
2257
2258
/* Trim ending '\n' */
2259
len = strlen(buf);
2260
if (buf[len - 1] == '\n')
2261
buf[len - 1] = '\0';
2262
/* Split on '=' and ensure that a value is present. */
2263
*sep = '\0';
2264
if (!sep[1]) {
2265
*sep = '=';
2266
pr_warn("failed to parse '%s': no value\n", buf);
2267
return -EINVAL;
2268
}
2269
2270
ext = find_extern_by_name(obj, buf);
2271
if (!ext || ext->is_set)
2272
return 0;
2273
2274
ext_val = data + ext->kcfg.data_off;
2275
value = sep + 1;
2276
2277
switch (*value) {
2278
case 'y': case 'n': case 'm':
2279
err = set_kcfg_value_tri(ext, ext_val, *value);
2280
break;
2281
case '"':
2282
err = set_kcfg_value_str(ext, ext_val, value);
2283
break;
2284
default:
2285
/* assume integer */
2286
err = parse_u64(value, &num);
2287
if (err) {
2288
pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
2289
return err;
2290
}
2291
if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
2292
pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
2293
return -EINVAL;
2294
}
2295
err = set_kcfg_value_num(ext, ext_val, num);
2296
break;
2297
}
2298
if (err)
2299
return err;
2300
pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
2301
return 0;
2302
}
2303
2304
static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
2305
{
2306
char buf[PATH_MAX];
2307
struct utsname uts;
2308
int len, err = 0;
2309
gzFile file;
2310
2311
uname(&uts);
2312
len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
2313
if (len < 0)
2314
return -EINVAL;
2315
else if (len >= PATH_MAX)
2316
return -ENAMETOOLONG;
2317
2318
/* gzopen also accepts uncompressed files. */
2319
file = gzopen(buf, "re");
2320
if (!file)
2321
file = gzopen("/proc/config.gz", "re");
2322
2323
if (!file) {
2324
pr_warn("failed to open system Kconfig\n");
2325
return -ENOENT;
2326
}
2327
2328
while (gzgets(file, buf, sizeof(buf))) {
2329
err = bpf_object__process_kconfig_line(obj, buf, data);
2330
if (err) {
2331
pr_warn("error parsing system Kconfig line '%s': %s\n",
2332
buf, errstr(err));
2333
goto out;
2334
}
2335
}
2336
2337
out:
2338
gzclose(file);
2339
return err;
2340
}
2341
2342
static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
2343
const char *config, void *data)
2344
{
2345
char buf[PATH_MAX];
2346
int err = 0;
2347
FILE *file;
2348
2349
file = fmemopen((void *)config, strlen(config), "r");
2350
if (!file) {
2351
err = -errno;
2352
pr_warn("failed to open in-memory Kconfig: %s\n", errstr(err));
2353
return err;
2354
}
2355
2356
while (fgets(buf, sizeof(buf), file)) {
2357
err = bpf_object__process_kconfig_line(obj, buf, data);
2358
if (err) {
2359
pr_warn("error parsing in-memory Kconfig line '%s': %s\n",
2360
buf, errstr(err));
2361
break;
2362
}
2363
}
2364
2365
fclose(file);
2366
return err;
2367
}
2368
2369
static int bpf_object__init_kconfig_map(struct bpf_object *obj)
2370
{
2371
struct extern_desc *last_ext = NULL, *ext;
2372
size_t map_sz;
2373
int i, err;
2374
2375
for (i = 0; i < obj->nr_extern; i++) {
2376
ext = &obj->externs[i];
2377
if (ext->type == EXT_KCFG)
2378
last_ext = ext;
2379
}
2380
2381
if (!last_ext)
2382
return 0;
2383
2384
map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
2385
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
2386
".kconfig", obj->efile.symbols_shndx,
2387
NULL, map_sz);
2388
if (err)
2389
return err;
2390
2391
obj->kconfig_map_idx = obj->nr_maps - 1;
2392
2393
return 0;
2394
}
2395
2396
const struct btf_type *
2397
skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
2398
{
2399
const struct btf_type *t = btf__type_by_id(btf, id);
2400
2401
if (res_id)
2402
*res_id = id;
2403
2404
while (btf_is_mod(t) || btf_is_typedef(t)) {
2405
if (res_id)
2406
*res_id = t->type;
2407
t = btf__type_by_id(btf, t->type);
2408
}
2409
2410
return t;
2411
}
2412
2413
static const struct btf_type *
2414
resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2415
{
2416
const struct btf_type *t;
2417
2418
t = skip_mods_and_typedefs(btf, id, NULL);
2419
if (!btf_is_ptr(t))
2420
return NULL;
2421
2422
t = skip_mods_and_typedefs(btf, t->type, res_id);
2423
2424
return btf_is_func_proto(t) ? t : NULL;
2425
}
2426
2427
static const char *__btf_kind_str(__u16 kind)
2428
{
2429
switch (kind) {
2430
case BTF_KIND_UNKN: return "void";
2431
case BTF_KIND_INT: return "int";
2432
case BTF_KIND_PTR: return "ptr";
2433
case BTF_KIND_ARRAY: return "array";
2434
case BTF_KIND_STRUCT: return "struct";
2435
case BTF_KIND_UNION: return "union";
2436
case BTF_KIND_ENUM: return "enum";
2437
case BTF_KIND_FWD: return "fwd";
2438
case BTF_KIND_TYPEDEF: return "typedef";
2439
case BTF_KIND_VOLATILE: return "volatile";
2440
case BTF_KIND_CONST: return "const";
2441
case BTF_KIND_RESTRICT: return "restrict";
2442
case BTF_KIND_FUNC: return "func";
2443
case BTF_KIND_FUNC_PROTO: return "func_proto";
2444
case BTF_KIND_VAR: return "var";
2445
case BTF_KIND_DATASEC: return "datasec";
2446
case BTF_KIND_FLOAT: return "float";
2447
case BTF_KIND_DECL_TAG: return "decl_tag";
2448
case BTF_KIND_TYPE_TAG: return "type_tag";
2449
case BTF_KIND_ENUM64: return "enum64";
2450
default: return "unknown";
2451
}
2452
}
2453
2454
const char *btf_kind_str(const struct btf_type *t)
2455
{
2456
return __btf_kind_str(btf_kind(t));
2457
}
2458
2459
/*
2460
* Fetch integer attribute of BTF map definition. Such attributes are
2461
* represented using a pointer to an array, in which dimensionality of array
2462
* encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2463
* encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2464
* type definition, while using only sizeof(void *) space in ELF data section.
2465
*/
2466
static bool get_map_field_int(const char *map_name, const struct btf *btf,
2467
const struct btf_member *m, __u32 *res)
2468
{
2469
const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2470
const char *name = btf__name_by_offset(btf, m->name_off);
2471
const struct btf_array *arr_info;
2472
const struct btf_type *arr_t;
2473
2474
if (!btf_is_ptr(t)) {
2475
pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2476
map_name, name, btf_kind_str(t));
2477
return false;
2478
}
2479
2480
arr_t = btf__type_by_id(btf, t->type);
2481
if (!arr_t) {
2482
pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2483
map_name, name, t->type);
2484
return false;
2485
}
2486
if (!btf_is_array(arr_t)) {
2487
pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2488
map_name, name, btf_kind_str(arr_t));
2489
return false;
2490
}
2491
arr_info = btf_array(arr_t);
2492
*res = arr_info->nelems;
2493
return true;
2494
}
2495
2496
static bool get_map_field_long(const char *map_name, const struct btf *btf,
2497
const struct btf_member *m, __u64 *res)
2498
{
2499
const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2500
const char *name = btf__name_by_offset(btf, m->name_off);
2501
2502
if (btf_is_ptr(t)) {
2503
__u32 res32;
2504
bool ret;
2505
2506
ret = get_map_field_int(map_name, btf, m, &res32);
2507
if (ret)
2508
*res = (__u64)res32;
2509
return ret;
2510
}
2511
2512
if (!btf_is_enum(t) && !btf_is_enum64(t)) {
2513
pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n",
2514
map_name, name, btf_kind_str(t));
2515
return false;
2516
}
2517
2518
if (btf_vlen(t) != 1) {
2519
pr_warn("map '%s': attr '%s': invalid __ulong\n",
2520
map_name, name);
2521
return false;
2522
}
2523
2524
if (btf_is_enum(t)) {
2525
const struct btf_enum *e = btf_enum(t);
2526
2527
*res = e->val;
2528
} else {
2529
const struct btf_enum64 *e = btf_enum64(t);
2530
2531
*res = btf_enum64_value(e);
2532
}
2533
return true;
2534
}
2535
2536
static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
2537
{
2538
int len;
2539
2540
len = snprintf(buf, buf_sz, "%s/%s", path, name);
2541
if (len < 0)
2542
return -EINVAL;
2543
if (len >= buf_sz)
2544
return -ENAMETOOLONG;
2545
2546
return 0;
2547
}
2548
2549
static int build_map_pin_path(struct bpf_map *map, const char *path)
2550
{
2551
char buf[PATH_MAX];
2552
int err;
2553
2554
if (!path)
2555
path = BPF_FS_DEFAULT_PATH;
2556
2557
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
2558
if (err)
2559
return err;
2560
2561
return bpf_map__set_pin_path(map, buf);
2562
}
2563
2564
/* should match definition in bpf_helpers.h */
2565
enum libbpf_pin_type {
2566
LIBBPF_PIN_NONE,
2567
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2568
LIBBPF_PIN_BY_NAME,
2569
};
2570
2571
int parse_btf_map_def(const char *map_name, struct btf *btf,
2572
const struct btf_type *def_t, bool strict,
2573
struct btf_map_def *map_def, struct btf_map_def *inner_def)
2574
{
2575
const struct btf_type *t;
2576
const struct btf_member *m;
2577
bool is_inner = inner_def == NULL;
2578
int vlen, i;
2579
2580
vlen = btf_vlen(def_t);
2581
m = btf_members(def_t);
2582
for (i = 0; i < vlen; i++, m++) {
2583
const char *name = btf__name_by_offset(btf, m->name_off);
2584
2585
if (!name) {
2586
pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2587
return -EINVAL;
2588
}
2589
if (strcmp(name, "type") == 0) {
2590
if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2591
return -EINVAL;
2592
map_def->parts |= MAP_DEF_MAP_TYPE;
2593
} else if (strcmp(name, "max_entries") == 0) {
2594
if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2595
return -EINVAL;
2596
map_def->parts |= MAP_DEF_MAX_ENTRIES;
2597
} else if (strcmp(name, "map_flags") == 0) {
2598
if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2599
return -EINVAL;
2600
map_def->parts |= MAP_DEF_MAP_FLAGS;
2601
} else if (strcmp(name, "numa_node") == 0) {
2602
if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2603
return -EINVAL;
2604
map_def->parts |= MAP_DEF_NUMA_NODE;
2605
} else if (strcmp(name, "key_size") == 0) {
2606
__u32 sz;
2607
2608
if (!get_map_field_int(map_name, btf, m, &sz))
2609
return -EINVAL;
2610
if (map_def->key_size && map_def->key_size != sz) {
2611
pr_warn("map '%s': conflicting key size %u != %u.\n",
2612
map_name, map_def->key_size, sz);
2613
return -EINVAL;
2614
}
2615
map_def->key_size = sz;
2616
map_def->parts |= MAP_DEF_KEY_SIZE;
2617
} else if (strcmp(name, "key") == 0) {
2618
__s64 sz;
2619
2620
t = btf__type_by_id(btf, m->type);
2621
if (!t) {
2622
pr_warn("map '%s': key type [%d] not found.\n",
2623
map_name, m->type);
2624
return -EINVAL;
2625
}
2626
if (!btf_is_ptr(t)) {
2627
pr_warn("map '%s': key spec is not PTR: %s.\n",
2628
map_name, btf_kind_str(t));
2629
return -EINVAL;
2630
}
2631
sz = btf__resolve_size(btf, t->type);
2632
if (sz < 0) {
2633
pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2634
map_name, t->type, (ssize_t)sz);
2635
return sz;
2636
}
2637
if (map_def->key_size && map_def->key_size != sz) {
2638
pr_warn("map '%s': conflicting key size %u != %zd.\n",
2639
map_name, map_def->key_size, (ssize_t)sz);
2640
return -EINVAL;
2641
}
2642
map_def->key_size = sz;
2643
map_def->key_type_id = t->type;
2644
map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2645
} else if (strcmp(name, "value_size") == 0) {
2646
__u32 sz;
2647
2648
if (!get_map_field_int(map_name, btf, m, &sz))
2649
return -EINVAL;
2650
if (map_def->value_size && map_def->value_size != sz) {
2651
pr_warn("map '%s': conflicting value size %u != %u.\n",
2652
map_name, map_def->value_size, sz);
2653
return -EINVAL;
2654
}
2655
map_def->value_size = sz;
2656
map_def->parts |= MAP_DEF_VALUE_SIZE;
2657
} else if (strcmp(name, "value") == 0) {
2658
__s64 sz;
2659
2660
t = btf__type_by_id(btf, m->type);
2661
if (!t) {
2662
pr_warn("map '%s': value type [%d] not found.\n",
2663
map_name, m->type);
2664
return -EINVAL;
2665
}
2666
if (!btf_is_ptr(t)) {
2667
pr_warn("map '%s': value spec is not PTR: %s.\n",
2668
map_name, btf_kind_str(t));
2669
return -EINVAL;
2670
}
2671
sz = btf__resolve_size(btf, t->type);
2672
if (sz < 0) {
2673
pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2674
map_name, t->type, (ssize_t)sz);
2675
return sz;
2676
}
2677
if (map_def->value_size && map_def->value_size != sz) {
2678
pr_warn("map '%s': conflicting value size %u != %zd.\n",
2679
map_name, map_def->value_size, (ssize_t)sz);
2680
return -EINVAL;
2681
}
2682
map_def->value_size = sz;
2683
map_def->value_type_id = t->type;
2684
map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2685
}
2686
else if (strcmp(name, "values") == 0) {
2687
bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2688
bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2689
const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2690
char inner_map_name[128];
2691
int err;
2692
2693
if (is_inner) {
2694
pr_warn("map '%s': multi-level inner maps not supported.\n",
2695
map_name);
2696
return -ENOTSUP;
2697
}
2698
if (i != vlen - 1) {
2699
pr_warn("map '%s': '%s' member should be last.\n",
2700
map_name, name);
2701
return -EINVAL;
2702
}
2703
if (!is_map_in_map && !is_prog_array) {
2704
pr_warn("map '%s': should be map-in-map or prog-array.\n",
2705
map_name);
2706
return -ENOTSUP;
2707
}
2708
if (map_def->value_size && map_def->value_size != 4) {
2709
pr_warn("map '%s': conflicting value size %u != 4.\n",
2710
map_name, map_def->value_size);
2711
return -EINVAL;
2712
}
2713
map_def->value_size = 4;
2714
t = btf__type_by_id(btf, m->type);
2715
if (!t) {
2716
pr_warn("map '%s': %s type [%d] not found.\n",
2717
map_name, desc, m->type);
2718
return -EINVAL;
2719
}
2720
if (!btf_is_array(t) || btf_array(t)->nelems) {
2721
pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2722
map_name, desc);
2723
return -EINVAL;
2724
}
2725
t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2726
if (!btf_is_ptr(t)) {
2727
pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2728
map_name, desc, btf_kind_str(t));
2729
return -EINVAL;
2730
}
2731
t = skip_mods_and_typedefs(btf, t->type, NULL);
2732
if (is_prog_array) {
2733
if (!btf_is_func_proto(t)) {
2734
pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2735
map_name, btf_kind_str(t));
2736
return -EINVAL;
2737
}
2738
continue;
2739
}
2740
if (!btf_is_struct(t)) {
2741
pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2742
map_name, btf_kind_str(t));
2743
return -EINVAL;
2744
}
2745
2746
snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2747
err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2748
if (err)
2749
return err;
2750
2751
map_def->parts |= MAP_DEF_INNER_MAP;
2752
} else if (strcmp(name, "pinning") == 0) {
2753
__u32 val;
2754
2755
if (is_inner) {
2756
pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2757
return -EINVAL;
2758
}
2759
if (!get_map_field_int(map_name, btf, m, &val))
2760
return -EINVAL;
2761
if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2762
pr_warn("map '%s': invalid pinning value %u.\n",
2763
map_name, val);
2764
return -EINVAL;
2765
}
2766
map_def->pinning = val;
2767
map_def->parts |= MAP_DEF_PINNING;
2768
} else if (strcmp(name, "map_extra") == 0) {
2769
__u64 map_extra;
2770
2771
if (!get_map_field_long(map_name, btf, m, &map_extra))
2772
return -EINVAL;
2773
map_def->map_extra = map_extra;
2774
map_def->parts |= MAP_DEF_MAP_EXTRA;
2775
} else {
2776
if (strict) {
2777
pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2778
return -ENOTSUP;
2779
}
2780
pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2781
}
2782
}
2783
2784
if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2785
pr_warn("map '%s': map type isn't specified.\n", map_name);
2786
return -EINVAL;
2787
}
2788
2789
return 0;
2790
}
2791
2792
static size_t adjust_ringbuf_sz(size_t sz)
2793
{
2794
__u32 page_sz = sysconf(_SC_PAGE_SIZE);
2795
__u32 mul;
2796
2797
/* if user forgot to set any size, make sure they see error */
2798
if (sz == 0)
2799
return 0;
2800
/* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2801
* a power-of-2 multiple of kernel's page size. If user diligently
2802
* satisified these conditions, pass the size through.
2803
*/
2804
if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2805
return sz;
2806
2807
/* Otherwise find closest (page_sz * power_of_2) product bigger than
2808
* user-set size to satisfy both user size request and kernel
2809
* requirements and substitute correct max_entries for map creation.
2810
*/
2811
for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2812
if (mul * page_sz > sz)
2813
return mul * page_sz;
2814
}
2815
2816
/* if it's impossible to satisfy the conditions (i.e., user size is
2817
* very close to UINT_MAX but is not a power-of-2 multiple of
2818
* page_size) then just return original size and let kernel reject it
2819
*/
2820
return sz;
2821
}
2822
2823
static bool map_is_ringbuf(const struct bpf_map *map)
2824
{
2825
return map->def.type == BPF_MAP_TYPE_RINGBUF ||
2826
map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
2827
}
2828
2829
static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2830
{
2831
map->def.type = def->map_type;
2832
map->def.key_size = def->key_size;
2833
map->def.value_size = def->value_size;
2834
map->def.max_entries = def->max_entries;
2835
map->def.map_flags = def->map_flags;
2836
map->map_extra = def->map_extra;
2837
2838
map->numa_node = def->numa_node;
2839
map->btf_key_type_id = def->key_type_id;
2840
map->btf_value_type_id = def->value_type_id;
2841
2842
/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2843
if (map_is_ringbuf(map))
2844
map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2845
2846
if (def->parts & MAP_DEF_MAP_TYPE)
2847
pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2848
2849
if (def->parts & MAP_DEF_KEY_TYPE)
2850
pr_debug("map '%s': found key [%u], sz = %u.\n",
2851
map->name, def->key_type_id, def->key_size);
2852
else if (def->parts & MAP_DEF_KEY_SIZE)
2853
pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2854
2855
if (def->parts & MAP_DEF_VALUE_TYPE)
2856
pr_debug("map '%s': found value [%u], sz = %u.\n",
2857
map->name, def->value_type_id, def->value_size);
2858
else if (def->parts & MAP_DEF_VALUE_SIZE)
2859
pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2860
2861
if (def->parts & MAP_DEF_MAX_ENTRIES)
2862
pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2863
if (def->parts & MAP_DEF_MAP_FLAGS)
2864
pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2865
if (def->parts & MAP_DEF_MAP_EXTRA)
2866
pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2867
(unsigned long long)def->map_extra);
2868
if (def->parts & MAP_DEF_PINNING)
2869
pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2870
if (def->parts & MAP_DEF_NUMA_NODE)
2871
pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2872
2873
if (def->parts & MAP_DEF_INNER_MAP)
2874
pr_debug("map '%s': found inner map definition.\n", map->name);
2875
}
2876
2877
static const char *btf_var_linkage_str(__u32 linkage)
2878
{
2879
switch (linkage) {
2880
case BTF_VAR_STATIC: return "static";
2881
case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2882
case BTF_VAR_GLOBAL_EXTERN: return "extern";
2883
default: return "unknown";
2884
}
2885
}
2886
2887
static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2888
const struct btf_type *sec,
2889
int var_idx, int sec_idx,
2890
const Elf_Data *data, bool strict,
2891
const char *pin_root_path)
2892
{
2893
struct btf_map_def map_def = {}, inner_def = {};
2894
const struct btf_type *var, *def;
2895
const struct btf_var_secinfo *vi;
2896
const struct btf_var *var_extra;
2897
const char *map_name;
2898
struct bpf_map *map;
2899
int err;
2900
2901
vi = btf_var_secinfos(sec) + var_idx;
2902
var = btf__type_by_id(obj->btf, vi->type);
2903
var_extra = btf_var(var);
2904
map_name = btf__name_by_offset(obj->btf, var->name_off);
2905
2906
if (map_name == NULL || map_name[0] == '\0') {
2907
pr_warn("map #%d: empty name.\n", var_idx);
2908
return -EINVAL;
2909
}
2910
if ((__u64)vi->offset + vi->size > data->d_size) {
2911
pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2912
return -EINVAL;
2913
}
2914
if (!btf_is_var(var)) {
2915
pr_warn("map '%s': unexpected var kind %s.\n",
2916
map_name, btf_kind_str(var));
2917
return -EINVAL;
2918
}
2919
if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2920
pr_warn("map '%s': unsupported map linkage %s.\n",
2921
map_name, btf_var_linkage_str(var_extra->linkage));
2922
return -EOPNOTSUPP;
2923
}
2924
2925
def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2926
if (!btf_is_struct(def)) {
2927
pr_warn("map '%s': unexpected def kind %s.\n",
2928
map_name, btf_kind_str(var));
2929
return -EINVAL;
2930
}
2931
if (def->size > vi->size) {
2932
pr_warn("map '%s': invalid def size.\n", map_name);
2933
return -EINVAL;
2934
}
2935
2936
map = bpf_object__add_map(obj);
2937
if (IS_ERR(map))
2938
return PTR_ERR(map);
2939
map->name = strdup(map_name);
2940
if (!map->name) {
2941
pr_warn("map '%s': failed to alloc map name.\n", map_name);
2942
return -ENOMEM;
2943
}
2944
map->libbpf_type = LIBBPF_MAP_UNSPEC;
2945
map->def.type = BPF_MAP_TYPE_UNSPEC;
2946
map->sec_idx = sec_idx;
2947
map->sec_offset = vi->offset;
2948
map->btf_var_idx = var_idx;
2949
pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2950
map_name, map->sec_idx, map->sec_offset);
2951
2952
err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2953
if (err)
2954
return err;
2955
2956
fill_map_from_def(map, &map_def);
2957
2958
if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2959
err = build_map_pin_path(map, pin_root_path);
2960
if (err) {
2961
pr_warn("map '%s': couldn't build pin path.\n", map->name);
2962
return err;
2963
}
2964
}
2965
2966
if (map_def.parts & MAP_DEF_INNER_MAP) {
2967
map->inner_map = calloc(1, sizeof(*map->inner_map));
2968
if (!map->inner_map)
2969
return -ENOMEM;
2970
map->inner_map->fd = create_placeholder_fd();
2971
if (map->inner_map->fd < 0)
2972
return map->inner_map->fd;
2973
map->inner_map->sec_idx = sec_idx;
2974
map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2975
if (!map->inner_map->name)
2976
return -ENOMEM;
2977
sprintf(map->inner_map->name, "%s.inner", map_name);
2978
2979
fill_map_from_def(map->inner_map, &inner_def);
2980
}
2981
2982
err = map_fill_btf_type_info(obj, map);
2983
if (err)
2984
return err;
2985
2986
return 0;
2987
}
2988
2989
static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
2990
const char *sec_name, int sec_idx,
2991
void *data, size_t data_sz)
2992
{
2993
const long page_sz = sysconf(_SC_PAGE_SIZE);
2994
size_t mmap_sz;
2995
2996
mmap_sz = bpf_map_mmap_sz(map);
2997
if (roundup(data_sz, page_sz) > mmap_sz) {
2998
pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
2999
sec_name, mmap_sz, data_sz);
3000
return -E2BIG;
3001
}
3002
3003
obj->arena_data = malloc(data_sz);
3004
if (!obj->arena_data)
3005
return -ENOMEM;
3006
memcpy(obj->arena_data, data, data_sz);
3007
obj->arena_data_sz = data_sz;
3008
3009
/* make bpf_map__init_value() work for ARENA maps */
3010
map->mmaped = obj->arena_data;
3011
3012
return 0;
3013
}
3014
3015
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
3016
const char *pin_root_path)
3017
{
3018
const struct btf_type *sec = NULL;
3019
int nr_types, i, vlen, err;
3020
const struct btf_type *t;
3021
const char *name;
3022
Elf_Data *data;
3023
Elf_Scn *scn;
3024
3025
if (obj->efile.btf_maps_shndx < 0)
3026
return 0;
3027
3028
scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
3029
data = elf_sec_data(obj, scn);
3030
if (!data) {
3031
pr_warn("elf: failed to get %s map definitions for %s\n",
3032
MAPS_ELF_SEC, obj->path);
3033
return -EINVAL;
3034
}
3035
3036
nr_types = btf__type_cnt(obj->btf);
3037
for (i = 1; i < nr_types; i++) {
3038
t = btf__type_by_id(obj->btf, i);
3039
if (!btf_is_datasec(t))
3040
continue;
3041
name = btf__name_by_offset(obj->btf, t->name_off);
3042
if (strcmp(name, MAPS_ELF_SEC) == 0) {
3043
sec = t;
3044
obj->efile.btf_maps_sec_btf_id = i;
3045
break;
3046
}
3047
}
3048
3049
if (!sec) {
3050
pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
3051
return -ENOENT;
3052
}
3053
3054
vlen = btf_vlen(sec);
3055
for (i = 0; i < vlen; i++) {
3056
err = bpf_object__init_user_btf_map(obj, sec, i,
3057
obj->efile.btf_maps_shndx,
3058
data, strict,
3059
pin_root_path);
3060
if (err)
3061
return err;
3062
}
3063
3064
for (i = 0; i < obj->nr_maps; i++) {
3065
struct bpf_map *map = &obj->maps[i];
3066
3067
if (map->def.type != BPF_MAP_TYPE_ARENA)
3068
continue;
3069
3070
if (obj->arena_map_idx >= 0) {
3071
pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
3072
map->name, obj->maps[obj->arena_map_idx].name);
3073
return -EINVAL;
3074
}
3075
obj->arena_map_idx = i;
3076
3077
if (obj->efile.arena_data) {
3078
err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
3079
obj->efile.arena_data->d_buf,
3080
obj->efile.arena_data->d_size);
3081
if (err)
3082
return err;
3083
}
3084
}
3085
if (obj->efile.arena_data && obj->arena_map_idx < 0) {
3086
pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
3087
ARENA_SEC);
3088
return -ENOENT;
3089
}
3090
3091
return 0;
3092
}
3093
3094
static int bpf_object__init_maps(struct bpf_object *obj,
3095
const struct bpf_object_open_opts *opts)
3096
{
3097
const char *pin_root_path;
3098
bool strict;
3099
int err = 0;
3100
3101
strict = !OPTS_GET(opts, relaxed_maps, false);
3102
pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
3103
3104
err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
3105
err = err ?: bpf_object__init_global_data_maps(obj);
3106
err = err ?: bpf_object__init_kconfig_map(obj);
3107
err = err ?: bpf_object_init_struct_ops(obj);
3108
3109
return err;
3110
}
3111
3112
static bool section_have_execinstr(struct bpf_object *obj, int idx)
3113
{
3114
Elf64_Shdr *sh;
3115
3116
sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
3117
if (!sh)
3118
return false;
3119
3120
return sh->sh_flags & SHF_EXECINSTR;
3121
}
3122
3123
static bool starts_with_qmark(const char *s)
3124
{
3125
return s && s[0] == '?';
3126
}
3127
3128
static bool btf_needs_sanitization(struct bpf_object *obj)
3129
{
3130
bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
3131
bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
3132
bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
3133
bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
3134
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
3135
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
3136
bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
3137
bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
3138
3139
return !has_func || !has_datasec || !has_func_global || !has_float ||
3140
!has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
3141
}
3142
3143
static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
3144
{
3145
bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
3146
bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
3147
bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
3148
bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
3149
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
3150
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
3151
bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
3152
bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
3153
int enum64_placeholder_id = 0;
3154
struct btf_type *t;
3155
int i, j, vlen;
3156
3157
for (i = 1; i < btf__type_cnt(btf); i++) {
3158
t = (struct btf_type *)btf__type_by_id(btf, i);
3159
3160
if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
3161
/* replace VAR/DECL_TAG with INT */
3162
t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
3163
/*
3164
* using size = 1 is the safest choice, 4 will be too
3165
* big and cause kernel BTF validation failure if
3166
* original variable took less than 4 bytes
3167
*/
3168
t->size = 1;
3169
*(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
3170
} else if (!has_datasec && btf_is_datasec(t)) {
3171
/* replace DATASEC with STRUCT */
3172
const struct btf_var_secinfo *v = btf_var_secinfos(t);
3173
struct btf_member *m = btf_members(t);
3174
struct btf_type *vt;
3175
char *name;
3176
3177
name = (char *)btf__name_by_offset(btf, t->name_off);
3178
while (*name) {
3179
if (*name == '.' || *name == '?')
3180
*name = '_';
3181
name++;
3182
}
3183
3184
vlen = btf_vlen(t);
3185
t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
3186
for (j = 0; j < vlen; j++, v++, m++) {
3187
/* order of field assignments is important */
3188
m->offset = v->offset * 8;
3189
m->type = v->type;
3190
/* preserve variable name as member name */
3191
vt = (void *)btf__type_by_id(btf, v->type);
3192
m->name_off = vt->name_off;
3193
}
3194
} else if (!has_qmark_datasec && btf_is_datasec(t) &&
3195
starts_with_qmark(btf__name_by_offset(btf, t->name_off))) {
3196
/* replace '?' prefix with '_' for DATASEC names */
3197
char *name;
3198
3199
name = (char *)btf__name_by_offset(btf, t->name_off);
3200
if (name[0] == '?')
3201
name[0] = '_';
3202
} else if (!has_func && btf_is_func_proto(t)) {
3203
/* replace FUNC_PROTO with ENUM */
3204
vlen = btf_vlen(t);
3205
t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
3206
t->size = sizeof(__u32); /* kernel enforced */
3207
} else if (!has_func && btf_is_func(t)) {
3208
/* replace FUNC with TYPEDEF */
3209
t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
3210
} else if (!has_func_global && btf_is_func(t)) {
3211
/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
3212
t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
3213
} else if (!has_float && btf_is_float(t)) {
3214
/* replace FLOAT with an equally-sized empty STRUCT;
3215
* since C compilers do not accept e.g. "float" as a
3216
* valid struct name, make it anonymous
3217
*/
3218
t->name_off = 0;
3219
t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
3220
} else if (!has_type_tag && btf_is_type_tag(t)) {
3221
/* replace TYPE_TAG with a CONST */
3222
t->name_off = 0;
3223
t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
3224
} else if (!has_enum64 && btf_is_enum(t)) {
3225
/* clear the kflag */
3226
t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
3227
} else if (!has_enum64 && btf_is_enum64(t)) {
3228
/* replace ENUM64 with a union */
3229
struct btf_member *m;
3230
3231
if (enum64_placeholder_id == 0) {
3232
enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
3233
if (enum64_placeholder_id < 0)
3234
return enum64_placeholder_id;
3235
3236
t = (struct btf_type *)btf__type_by_id(btf, i);
3237
}
3238
3239
m = btf_members(t);
3240
vlen = btf_vlen(t);
3241
t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
3242
for (j = 0; j < vlen; j++, m++) {
3243
m->type = enum64_placeholder_id;
3244
m->offset = 0;
3245
}
3246
}
3247
}
3248
3249
return 0;
3250
}
3251
3252
static bool libbpf_needs_btf(const struct bpf_object *obj)
3253
{
3254
return obj->efile.btf_maps_shndx >= 0 ||
3255
obj->efile.has_st_ops ||
3256
obj->nr_extern > 0;
3257
}
3258
3259
static bool kernel_needs_btf(const struct bpf_object *obj)
3260
{
3261
return obj->efile.has_st_ops;
3262
}
3263
3264
static int bpf_object__init_btf(struct bpf_object *obj,
3265
Elf_Data *btf_data,
3266
Elf_Data *btf_ext_data)
3267
{
3268
int err = -ENOENT;
3269
3270
if (btf_data) {
3271
obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
3272
err = libbpf_get_error(obj->btf);
3273
if (err) {
3274
obj->btf = NULL;
3275
pr_warn("Error loading ELF section %s: %s.\n", BTF_ELF_SEC, errstr(err));
3276
goto out;
3277
}
3278
/* enforce 8-byte pointers for BPF-targeted BTFs */
3279
btf__set_pointer_size(obj->btf, 8);
3280
}
3281
if (btf_ext_data) {
3282
struct btf_ext_info *ext_segs[3];
3283
int seg_num, sec_num;
3284
3285
if (!obj->btf) {
3286
pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
3287
BTF_EXT_ELF_SEC, BTF_ELF_SEC);
3288
goto out;
3289
}
3290
obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
3291
err = libbpf_get_error(obj->btf_ext);
3292
if (err) {
3293
pr_warn("Error loading ELF section %s: %s. Ignored and continue.\n",
3294
BTF_EXT_ELF_SEC, errstr(err));
3295
obj->btf_ext = NULL;
3296
goto out;
3297
}
3298
3299
/* setup .BTF.ext to ELF section mapping */
3300
ext_segs[0] = &obj->btf_ext->func_info;
3301
ext_segs[1] = &obj->btf_ext->line_info;
3302
ext_segs[2] = &obj->btf_ext->core_relo_info;
3303
for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
3304
struct btf_ext_info *seg = ext_segs[seg_num];
3305
const struct btf_ext_info_sec *sec;
3306
const char *sec_name;
3307
Elf_Scn *scn;
3308
3309
if (seg->sec_cnt == 0)
3310
continue;
3311
3312
seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
3313
if (!seg->sec_idxs) {
3314
err = -ENOMEM;
3315
goto out;
3316
}
3317
3318
sec_num = 0;
3319
for_each_btf_ext_sec(seg, sec) {
3320
/* preventively increment index to avoid doing
3321
* this before every continue below
3322
*/
3323
sec_num++;
3324
3325
sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
3326
if (str_is_empty(sec_name))
3327
continue;
3328
scn = elf_sec_by_name(obj, sec_name);
3329
if (!scn)
3330
continue;
3331
3332
seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
3333
}
3334
}
3335
}
3336
out:
3337
if (err && libbpf_needs_btf(obj)) {
3338
pr_warn("BTF is required, but is missing or corrupted.\n");
3339
return err;
3340
}
3341
return 0;
3342
}
3343
3344
static int compare_vsi_off(const void *_a, const void *_b)
3345
{
3346
const struct btf_var_secinfo *a = _a;
3347
const struct btf_var_secinfo *b = _b;
3348
3349
return a->offset - b->offset;
3350
}
3351
3352
static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
3353
struct btf_type *t)
3354
{
3355
__u32 size = 0, i, vars = btf_vlen(t);
3356
const char *sec_name = btf__name_by_offset(btf, t->name_off);
3357
struct btf_var_secinfo *vsi;
3358
bool fixup_offsets = false;
3359
int err;
3360
3361
if (!sec_name) {
3362
pr_debug("No name found in string section for DATASEC kind.\n");
3363
return -ENOENT;
3364
}
3365
3366
/* Extern-backing datasecs (.ksyms, .kconfig) have their size and
3367
* variable offsets set at the previous step. Further, not every
3368
* extern BTF VAR has corresponding ELF symbol preserved, so we skip
3369
* all fixups altogether for such sections and go straight to sorting
3370
* VARs within their DATASEC.
3371
*/
3372
if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
3373
goto sort_vars;
3374
3375
/* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
3376
* fix this up. But BPF static linker already fixes this up and fills
3377
* all the sizes and offsets during static linking. So this step has
3378
* to be optional. But the STV_HIDDEN handling is non-optional for any
3379
* non-extern DATASEC, so the variable fixup loop below handles both
3380
* functions at the same time, paying the cost of BTF VAR <-> ELF
3381
* symbol matching just once.
3382
*/
3383
if (t->size == 0) {
3384
err = find_elf_sec_sz(obj, sec_name, &size);
3385
if (err || !size) {
3386
pr_debug("sec '%s': failed to determine size from ELF: size %u, err %s\n",
3387
sec_name, size, errstr(err));
3388
return -ENOENT;
3389
}
3390
3391
t->size = size;
3392
fixup_offsets = true;
3393
}
3394
3395
for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
3396
const struct btf_type *t_var;
3397
struct btf_var *var;
3398
const char *var_name;
3399
Elf64_Sym *sym;
3400
3401
t_var = btf__type_by_id(btf, vsi->type);
3402
if (!t_var || !btf_is_var(t_var)) {
3403
pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
3404
return -EINVAL;
3405
}
3406
3407
var = btf_var(t_var);
3408
if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
3409
continue;
3410
3411
var_name = btf__name_by_offset(btf, t_var->name_off);
3412
if (!var_name) {
3413
pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
3414
sec_name, i);
3415
return -ENOENT;
3416
}
3417
3418
sym = find_elf_var_sym(obj, var_name);
3419
if (IS_ERR(sym)) {
3420
pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
3421
sec_name, var_name);
3422
return -ENOENT;
3423
}
3424
3425
if (fixup_offsets)
3426
vsi->offset = sym->st_value;
3427
3428
/* if variable is a global/weak symbol, but has restricted
3429
* (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
3430
* as static. This follows similar logic for functions (BPF
3431
* subprogs) and influences libbpf's further decisions about
3432
* whether to make global data BPF array maps as
3433
* BPF_F_MMAPABLE.
3434
*/
3435
if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
3436
|| ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
3437
var->linkage = BTF_VAR_STATIC;
3438
}
3439
3440
sort_vars:
3441
qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
3442
return 0;
3443
}
3444
3445
static int bpf_object_fixup_btf(struct bpf_object *obj)
3446
{
3447
int i, n, err = 0;
3448
3449
if (!obj->btf)
3450
return 0;
3451
3452
n = btf__type_cnt(obj->btf);
3453
for (i = 1; i < n; i++) {
3454
struct btf_type *t = btf_type_by_id(obj->btf, i);
3455
3456
/* Loader needs to fix up some of the things compiler
3457
* couldn't get its hands on while emitting BTF. This
3458
* is section size and global variable offset. We use
3459
* the info from the ELF itself for this purpose.
3460
*/
3461
if (btf_is_datasec(t)) {
3462
err = btf_fixup_datasec(obj, obj->btf, t);
3463
if (err)
3464
return err;
3465
}
3466
}
3467
3468
return 0;
3469
}
3470
3471
static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
3472
{
3473
if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
3474
prog->type == BPF_PROG_TYPE_LSM)
3475
return true;
3476
3477
/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
3478
* also need vmlinux BTF
3479
*/
3480
if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
3481
return true;
3482
3483
return false;
3484
}
3485
3486
static bool map_needs_vmlinux_btf(struct bpf_map *map)
3487
{
3488
return bpf_map__is_struct_ops(map);
3489
}
3490
3491
static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
3492
{
3493
struct bpf_program *prog;
3494
struct bpf_map *map;
3495
int i;
3496
3497
/* CO-RE relocations need kernel BTF, only when btf_custom_path
3498
* is not specified
3499
*/
3500
if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
3501
return true;
3502
3503
/* Support for typed ksyms needs kernel BTF */
3504
for (i = 0; i < obj->nr_extern; i++) {
3505
const struct extern_desc *ext;
3506
3507
ext = &obj->externs[i];
3508
if (ext->type == EXT_KSYM && ext->ksym.type_id)
3509
return true;
3510
}
3511
3512
bpf_object__for_each_program(prog, obj) {
3513
if (!prog->autoload)
3514
continue;
3515
if (prog_needs_vmlinux_btf(prog))
3516
return true;
3517
}
3518
3519
bpf_object__for_each_map(map, obj) {
3520
if (map_needs_vmlinux_btf(map))
3521
return true;
3522
}
3523
3524
return false;
3525
}
3526
3527
static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
3528
{
3529
int err;
3530
3531
/* btf_vmlinux could be loaded earlier */
3532
if (obj->btf_vmlinux || obj->gen_loader)
3533
return 0;
3534
3535
if (!force && !obj_needs_vmlinux_btf(obj))
3536
return 0;
3537
3538
obj->btf_vmlinux = btf__load_vmlinux_btf();
3539
err = libbpf_get_error(obj->btf_vmlinux);
3540
if (err) {
3541
pr_warn("Error loading vmlinux BTF: %s\n", errstr(err));
3542
obj->btf_vmlinux = NULL;
3543
return err;
3544
}
3545
return 0;
3546
}
3547
3548
static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
3549
{
3550
struct btf *kern_btf = obj->btf;
3551
bool btf_mandatory, sanitize;
3552
int i, err = 0;
3553
3554
if (!obj->btf)
3555
return 0;
3556
3557
if (!kernel_supports(obj, FEAT_BTF)) {
3558
if (kernel_needs_btf(obj)) {
3559
err = -EOPNOTSUPP;
3560
goto report;
3561
}
3562
pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3563
return 0;
3564
}
3565
3566
/* Even though some subprogs are global/weak, user might prefer more
3567
* permissive BPF verification process that BPF verifier performs for
3568
* static functions, taking into account more context from the caller
3569
* functions. In such case, they need to mark such subprogs with
3570
* __attribute__((visibility("hidden"))) and libbpf will adjust
3571
* corresponding FUNC BTF type to be marked as static and trigger more
3572
* involved BPF verification process.
3573
*/
3574
for (i = 0; i < obj->nr_programs; i++) {
3575
struct bpf_program *prog = &obj->programs[i];
3576
struct btf_type *t;
3577
const char *name;
3578
int j, n;
3579
3580
if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3581
continue;
3582
3583
n = btf__type_cnt(obj->btf);
3584
for (j = 1; j < n; j++) {
3585
t = btf_type_by_id(obj->btf, j);
3586
if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3587
continue;
3588
3589
name = btf__str_by_offset(obj->btf, t->name_off);
3590
if (strcmp(name, prog->name) != 0)
3591
continue;
3592
3593
t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3594
break;
3595
}
3596
}
3597
3598
sanitize = btf_needs_sanitization(obj);
3599
if (sanitize) {
3600
const void *raw_data;
3601
__u32 sz;
3602
3603
/* clone BTF to sanitize a copy and leave the original intact */
3604
raw_data = btf__raw_data(obj->btf, &sz);
3605
kern_btf = btf__new(raw_data, sz);
3606
err = libbpf_get_error(kern_btf);
3607
if (err)
3608
return err;
3609
3610
/* enforce 8-byte pointers for BPF-targeted BTFs */
3611
btf__set_pointer_size(obj->btf, 8);
3612
err = bpf_object__sanitize_btf(obj, kern_btf);
3613
if (err)
3614
return err;
3615
}
3616
3617
if (obj->gen_loader) {
3618
__u32 raw_size = 0;
3619
const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3620
3621
if (!raw_data)
3622
return -ENOMEM;
3623
bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3624
/* Pretend to have valid FD to pass various fd >= 0 checks.
3625
* This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3626
*/
3627
btf__set_fd(kern_btf, 0);
3628
} else {
3629
/* currently BPF_BTF_LOAD only supports log_level 1 */
3630
err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3631
obj->log_level ? 1 : 0, obj->token_fd);
3632
}
3633
if (sanitize) {
3634
if (!err) {
3635
/* move fd to libbpf's BTF */
3636
btf__set_fd(obj->btf, btf__fd(kern_btf));
3637
btf__set_fd(kern_btf, -1);
3638
}
3639
btf__free(kern_btf);
3640
}
3641
report:
3642
if (err) {
3643
btf_mandatory = kernel_needs_btf(obj);
3644
if (btf_mandatory) {
3645
pr_warn("Error loading .BTF into kernel: %s. BTF is mandatory, can't proceed.\n",
3646
errstr(err));
3647
} else {
3648
pr_info("Error loading .BTF into kernel: %s. BTF is optional, ignoring.\n",
3649
errstr(err));
3650
err = 0;
3651
}
3652
}
3653
return err;
3654
}
3655
3656
static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3657
{
3658
const char *name;
3659
3660
name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3661
if (!name) {
3662
pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3663
off, obj->path, elf_errmsg(-1));
3664
return NULL;
3665
}
3666
3667
return name;
3668
}
3669
3670
static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3671
{
3672
const char *name;
3673
3674
name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3675
if (!name) {
3676
pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3677
off, obj->path, elf_errmsg(-1));
3678
return NULL;
3679
}
3680
3681
return name;
3682
}
3683
3684
static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3685
{
3686
Elf_Scn *scn;
3687
3688
scn = elf_getscn(obj->efile.elf, idx);
3689
if (!scn) {
3690
pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3691
idx, obj->path, elf_errmsg(-1));
3692
return NULL;
3693
}
3694
return scn;
3695
}
3696
3697
static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3698
{
3699
Elf_Scn *scn = NULL;
3700
Elf *elf = obj->efile.elf;
3701
const char *sec_name;
3702
3703
while ((scn = elf_nextscn(elf, scn)) != NULL) {
3704
sec_name = elf_sec_name(obj, scn);
3705
if (!sec_name)
3706
return NULL;
3707
3708
if (strcmp(sec_name, name) != 0)
3709
continue;
3710
3711
return scn;
3712
}
3713
return NULL;
3714
}
3715
3716
static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3717
{
3718
Elf64_Shdr *shdr;
3719
3720
if (!scn)
3721
return NULL;
3722
3723
shdr = elf64_getshdr(scn);
3724
if (!shdr) {
3725
pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3726
elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3727
return NULL;
3728
}
3729
3730
return shdr;
3731
}
3732
3733
static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3734
{
3735
const char *name;
3736
Elf64_Shdr *sh;
3737
3738
if (!scn)
3739
return NULL;
3740
3741
sh = elf_sec_hdr(obj, scn);
3742
if (!sh)
3743
return NULL;
3744
3745
name = elf_sec_str(obj, sh->sh_name);
3746
if (!name) {
3747
pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3748
elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3749
return NULL;
3750
}
3751
3752
return name;
3753
}
3754
3755
static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3756
{
3757
Elf_Data *data;
3758
3759
if (!scn)
3760
return NULL;
3761
3762
data = elf_getdata(scn, 0);
3763
if (!data) {
3764
pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3765
elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3766
obj->path, elf_errmsg(-1));
3767
return NULL;
3768
}
3769
3770
return data;
3771
}
3772
3773
static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3774
{
3775
if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3776
return NULL;
3777
3778
return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3779
}
3780
3781
static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3782
{
3783
if (idx >= data->d_size / sizeof(Elf64_Rel))
3784
return NULL;
3785
3786
return (Elf64_Rel *)data->d_buf + idx;
3787
}
3788
3789
static bool is_sec_name_dwarf(const char *name)
3790
{
3791
/* approximation, but the actual list is too long */
3792
return str_has_pfx(name, ".debug_");
3793
}
3794
3795
static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3796
{
3797
/* no special handling of .strtab */
3798
if (hdr->sh_type == SHT_STRTAB)
3799
return true;
3800
3801
/* ignore .llvm_addrsig section as well */
3802
if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3803
return true;
3804
3805
/* no subprograms will lead to an empty .text section, ignore it */
3806
if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3807
strcmp(name, ".text") == 0)
3808
return true;
3809
3810
/* DWARF sections */
3811
if (is_sec_name_dwarf(name))
3812
return true;
3813
3814
if (str_has_pfx(name, ".rel")) {
3815
name += sizeof(".rel") - 1;
3816
/* DWARF section relocations */
3817
if (is_sec_name_dwarf(name))
3818
return true;
3819
3820
/* .BTF and .BTF.ext don't need relocations */
3821
if (strcmp(name, BTF_ELF_SEC) == 0 ||
3822
strcmp(name, BTF_EXT_ELF_SEC) == 0)
3823
return true;
3824
}
3825
3826
return false;
3827
}
3828
3829
static int cmp_progs(const void *_a, const void *_b)
3830
{
3831
const struct bpf_program *a = _a;
3832
const struct bpf_program *b = _b;
3833
3834
if (a->sec_idx != b->sec_idx)
3835
return a->sec_idx < b->sec_idx ? -1 : 1;
3836
3837
/* sec_insn_off can't be the same within the section */
3838
return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3839
}
3840
3841
static int bpf_object__elf_collect(struct bpf_object *obj)
3842
{
3843
struct elf_sec_desc *sec_desc;
3844
Elf *elf = obj->efile.elf;
3845
Elf_Data *btf_ext_data = NULL;
3846
Elf_Data *btf_data = NULL;
3847
int idx = 0, err = 0;
3848
const char *name;
3849
Elf_Data *data;
3850
Elf_Scn *scn;
3851
Elf64_Shdr *sh;
3852
3853
/* ELF section indices are 0-based, but sec #0 is special "invalid"
3854
* section. Since section count retrieved by elf_getshdrnum() does
3855
* include sec #0, it is already the necessary size of an array to keep
3856
* all the sections.
3857
*/
3858
if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
3859
pr_warn("elf: failed to get the number of sections for %s: %s\n",
3860
obj->path, elf_errmsg(-1));
3861
return -LIBBPF_ERRNO__FORMAT;
3862
}
3863
obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3864
if (!obj->efile.secs)
3865
return -ENOMEM;
3866
3867
/* a bunch of ELF parsing functionality depends on processing symbols,
3868
* so do the first pass and find the symbol table
3869
*/
3870
scn = NULL;
3871
while ((scn = elf_nextscn(elf, scn)) != NULL) {
3872
sh = elf_sec_hdr(obj, scn);
3873
if (!sh)
3874
return -LIBBPF_ERRNO__FORMAT;
3875
3876
if (sh->sh_type == SHT_SYMTAB) {
3877
if (obj->efile.symbols) {
3878
pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3879
return -LIBBPF_ERRNO__FORMAT;
3880
}
3881
3882
data = elf_sec_data(obj, scn);
3883
if (!data)
3884
return -LIBBPF_ERRNO__FORMAT;
3885
3886
idx = elf_ndxscn(scn);
3887
3888
obj->efile.symbols = data;
3889
obj->efile.symbols_shndx = idx;
3890
obj->efile.strtabidx = sh->sh_link;
3891
}
3892
}
3893
3894
if (!obj->efile.symbols) {
3895
pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3896
obj->path);
3897
return -ENOENT;
3898
}
3899
3900
scn = NULL;
3901
while ((scn = elf_nextscn(elf, scn)) != NULL) {
3902
idx = elf_ndxscn(scn);
3903
sec_desc = &obj->efile.secs[idx];
3904
3905
sh = elf_sec_hdr(obj, scn);
3906
if (!sh)
3907
return -LIBBPF_ERRNO__FORMAT;
3908
3909
name = elf_sec_str(obj, sh->sh_name);
3910
if (!name)
3911
return -LIBBPF_ERRNO__FORMAT;
3912
3913
if (ignore_elf_section(sh, name))
3914
continue;
3915
3916
data = elf_sec_data(obj, scn);
3917
if (!data)
3918
return -LIBBPF_ERRNO__FORMAT;
3919
3920
pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3921
idx, name, (unsigned long)data->d_size,
3922
(int)sh->sh_link, (unsigned long)sh->sh_flags,
3923
(int)sh->sh_type);
3924
3925
if (strcmp(name, "license") == 0) {
3926
err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3927
if (err)
3928
return err;
3929
} else if (strcmp(name, "version") == 0) {
3930
err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3931
if (err)
3932
return err;
3933
} else if (strcmp(name, "maps") == 0) {
3934
pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3935
return -ENOTSUP;
3936
} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3937
obj->efile.btf_maps_shndx = idx;
3938
} else if (strcmp(name, BTF_ELF_SEC) == 0) {
3939
if (sh->sh_type != SHT_PROGBITS)
3940
return -LIBBPF_ERRNO__FORMAT;
3941
btf_data = data;
3942
} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3943
if (sh->sh_type != SHT_PROGBITS)
3944
return -LIBBPF_ERRNO__FORMAT;
3945
btf_ext_data = data;
3946
} else if (sh->sh_type == SHT_SYMTAB) {
3947
/* already processed during the first pass above */
3948
} else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3949
if (sh->sh_flags & SHF_EXECINSTR) {
3950
if (strcmp(name, ".text") == 0)
3951
obj->efile.text_shndx = idx;
3952
err = bpf_object__add_programs(obj, data, name, idx);
3953
if (err)
3954
return err;
3955
} else if (strcmp(name, DATA_SEC) == 0 ||
3956
str_has_pfx(name, DATA_SEC ".")) {
3957
sec_desc->sec_type = SEC_DATA;
3958
sec_desc->shdr = sh;
3959
sec_desc->data = data;
3960
} else if (strcmp(name, RODATA_SEC) == 0 ||
3961
str_has_pfx(name, RODATA_SEC ".")) {
3962
sec_desc->sec_type = SEC_RODATA;
3963
sec_desc->shdr = sh;
3964
sec_desc->data = data;
3965
} else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
3966
strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
3967
strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
3968
strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) {
3969
sec_desc->sec_type = SEC_ST_OPS;
3970
sec_desc->shdr = sh;
3971
sec_desc->data = data;
3972
obj->efile.has_st_ops = true;
3973
} else if (strcmp(name, ARENA_SEC) == 0) {
3974
obj->efile.arena_data = data;
3975
obj->efile.arena_data_shndx = idx;
3976
} else if (strcmp(name, JUMPTABLES_SEC) == 0) {
3977
obj->jumptables_data = malloc(data->d_size);
3978
if (!obj->jumptables_data)
3979
return -ENOMEM;
3980
memcpy(obj->jumptables_data, data->d_buf, data->d_size);
3981
obj->jumptables_data_sz = data->d_size;
3982
obj->efile.jumptables_data_shndx = idx;
3983
} else {
3984
pr_info("elf: skipping unrecognized data section(%d) %s\n",
3985
idx, name);
3986
}
3987
} else if (sh->sh_type == SHT_REL) {
3988
int targ_sec_idx = sh->sh_info; /* points to other section */
3989
3990
if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3991
targ_sec_idx >= obj->efile.sec_cnt)
3992
return -LIBBPF_ERRNO__FORMAT;
3993
3994
/* Only do relo for section with exec instructions */
3995
if (!section_have_execinstr(obj, targ_sec_idx) &&
3996
strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3997
strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
3998
strcmp(name, ".rel?" STRUCT_OPS_SEC) &&
3999
strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) &&
4000
strcmp(name, ".rel" MAPS_ELF_SEC)) {
4001
pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
4002
idx, name, targ_sec_idx,
4003
elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
4004
continue;
4005
}
4006
4007
sec_desc->sec_type = SEC_RELO;
4008
sec_desc->shdr = sh;
4009
sec_desc->data = data;
4010
} else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
4011
str_has_pfx(name, BSS_SEC "."))) {
4012
sec_desc->sec_type = SEC_BSS;
4013
sec_desc->shdr = sh;
4014
sec_desc->data = data;
4015
} else {
4016
pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
4017
(size_t)sh->sh_size);
4018
}
4019
}
4020
4021
if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
4022
pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
4023
return -LIBBPF_ERRNO__FORMAT;
4024
}
4025
4026
/* change BPF program insns to native endianness for introspection */
4027
if (!is_native_endianness(obj))
4028
bpf_object_bswap_progs(obj);
4029
4030
/* sort BPF programs by section name and in-section instruction offset
4031
* for faster search
4032
*/
4033
if (obj->nr_programs)
4034
qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
4035
4036
return bpf_object__init_btf(obj, btf_data, btf_ext_data);
4037
}
4038
4039
static bool sym_is_extern(const Elf64_Sym *sym)
4040
{
4041
int bind = ELF64_ST_BIND(sym->st_info);
4042
/* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
4043
return sym->st_shndx == SHN_UNDEF &&
4044
(bind == STB_GLOBAL || bind == STB_WEAK) &&
4045
ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
4046
}
4047
4048
static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
4049
{
4050
int bind = ELF64_ST_BIND(sym->st_info);
4051
int type = ELF64_ST_TYPE(sym->st_info);
4052
4053
/* in .text section */
4054
if (sym->st_shndx != text_shndx)
4055
return false;
4056
4057
/* local function */
4058
if (bind == STB_LOCAL && type == STT_SECTION)
4059
return true;
4060
4061
/* global function */
4062
return (bind == STB_GLOBAL || bind == STB_WEAK) && type == STT_FUNC;
4063
}
4064
4065
static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
4066
{
4067
const struct btf_type *t;
4068
const char *tname;
4069
int i, n;
4070
4071
if (!btf)
4072
return -ESRCH;
4073
4074
n = btf__type_cnt(btf);
4075
for (i = 1; i < n; i++) {
4076
t = btf__type_by_id(btf, i);
4077
4078
if (!btf_is_var(t) && !btf_is_func(t))
4079
continue;
4080
4081
tname = btf__name_by_offset(btf, t->name_off);
4082
if (strcmp(tname, ext_name))
4083
continue;
4084
4085
if (btf_is_var(t) &&
4086
btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
4087
return -EINVAL;
4088
4089
if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
4090
return -EINVAL;
4091
4092
return i;
4093
}
4094
4095
return -ENOENT;
4096
}
4097
4098
static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
4099
const struct btf_var_secinfo *vs;
4100
const struct btf_type *t;
4101
int i, j, n;
4102
4103
if (!btf)
4104
return -ESRCH;
4105
4106
n = btf__type_cnt(btf);
4107
for (i = 1; i < n; i++) {
4108
t = btf__type_by_id(btf, i);
4109
4110
if (!btf_is_datasec(t))
4111
continue;
4112
4113
vs = btf_var_secinfos(t);
4114
for (j = 0; j < btf_vlen(t); j++, vs++) {
4115
if (vs->type == ext_btf_id)
4116
return i;
4117
}
4118
}
4119
4120
return -ENOENT;
4121
}
4122
4123
static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
4124
bool *is_signed)
4125
{
4126
const struct btf_type *t;
4127
const char *name;
4128
4129
t = skip_mods_and_typedefs(btf, id, NULL);
4130
name = btf__name_by_offset(btf, t->name_off);
4131
4132
if (is_signed)
4133
*is_signed = false;
4134
switch (btf_kind(t)) {
4135
case BTF_KIND_INT: {
4136
int enc = btf_int_encoding(t);
4137
4138
if (enc & BTF_INT_BOOL)
4139
return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
4140
if (is_signed)
4141
*is_signed = enc & BTF_INT_SIGNED;
4142
if (t->size == 1)
4143
return KCFG_CHAR;
4144
if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
4145
return KCFG_UNKNOWN;
4146
return KCFG_INT;
4147
}
4148
case BTF_KIND_ENUM:
4149
if (t->size != 4)
4150
return KCFG_UNKNOWN;
4151
if (strcmp(name, "libbpf_tristate"))
4152
return KCFG_UNKNOWN;
4153
return KCFG_TRISTATE;
4154
case BTF_KIND_ENUM64:
4155
if (strcmp(name, "libbpf_tristate"))
4156
return KCFG_UNKNOWN;
4157
return KCFG_TRISTATE;
4158
case BTF_KIND_ARRAY:
4159
if (btf_array(t)->nelems == 0)
4160
return KCFG_UNKNOWN;
4161
if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
4162
return KCFG_UNKNOWN;
4163
return KCFG_CHAR_ARR;
4164
default:
4165
return KCFG_UNKNOWN;
4166
}
4167
}
4168
4169
static int cmp_externs(const void *_a, const void *_b)
4170
{
4171
const struct extern_desc *a = _a;
4172
const struct extern_desc *b = _b;
4173
4174
if (a->type != b->type)
4175
return a->type < b->type ? -1 : 1;
4176
4177
if (a->type == EXT_KCFG) {
4178
/* descending order by alignment requirements */
4179
if (a->kcfg.align != b->kcfg.align)
4180
return a->kcfg.align > b->kcfg.align ? -1 : 1;
4181
/* ascending order by size, within same alignment class */
4182
if (a->kcfg.sz != b->kcfg.sz)
4183
return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
4184
}
4185
4186
/* resolve ties by name */
4187
return strcmp(a->name, b->name);
4188
}
4189
4190
static int find_int_btf_id(const struct btf *btf)
4191
{
4192
const struct btf_type *t;
4193
int i, n;
4194
4195
n = btf__type_cnt(btf);
4196
for (i = 1; i < n; i++) {
4197
t = btf__type_by_id(btf, i);
4198
4199
if (btf_is_int(t) && btf_int_bits(t) == 32)
4200
return i;
4201
}
4202
4203
return 0;
4204
}
4205
4206
static int add_dummy_ksym_var(struct btf *btf)
4207
{
4208
int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
4209
const struct btf_var_secinfo *vs;
4210
const struct btf_type *sec;
4211
4212
if (!btf)
4213
return 0;
4214
4215
sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
4216
BTF_KIND_DATASEC);
4217
if (sec_btf_id < 0)
4218
return 0;
4219
4220
sec = btf__type_by_id(btf, sec_btf_id);
4221
vs = btf_var_secinfos(sec);
4222
for (i = 0; i < btf_vlen(sec); i++, vs++) {
4223
const struct btf_type *vt;
4224
4225
vt = btf__type_by_id(btf, vs->type);
4226
if (btf_is_func(vt))
4227
break;
4228
}
4229
4230
/* No func in ksyms sec. No need to add dummy var. */
4231
if (i == btf_vlen(sec))
4232
return 0;
4233
4234
int_btf_id = find_int_btf_id(btf);
4235
dummy_var_btf_id = btf__add_var(btf,
4236
"dummy_ksym",
4237
BTF_VAR_GLOBAL_ALLOCATED,
4238
int_btf_id);
4239
if (dummy_var_btf_id < 0)
4240
pr_warn("cannot create a dummy_ksym var\n");
4241
4242
return dummy_var_btf_id;
4243
}
4244
4245
static int bpf_object__collect_externs(struct bpf_object *obj)
4246
{
4247
struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
4248
const struct btf_type *t;
4249
struct extern_desc *ext;
4250
int i, n, off, dummy_var_btf_id;
4251
const char *ext_name, *sec_name;
4252
size_t ext_essent_len;
4253
Elf_Scn *scn;
4254
Elf64_Shdr *sh;
4255
4256
if (!obj->efile.symbols)
4257
return 0;
4258
4259
scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
4260
sh = elf_sec_hdr(obj, scn);
4261
if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
4262
return -LIBBPF_ERRNO__FORMAT;
4263
4264
dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
4265
if (dummy_var_btf_id < 0)
4266
return dummy_var_btf_id;
4267
4268
n = sh->sh_size / sh->sh_entsize;
4269
pr_debug("looking for externs among %d symbols...\n", n);
4270
4271
for (i = 0; i < n; i++) {
4272
Elf64_Sym *sym = elf_sym_by_idx(obj, i);
4273
4274
if (!sym)
4275
return -LIBBPF_ERRNO__FORMAT;
4276
if (!sym_is_extern(sym))
4277
continue;
4278
ext_name = elf_sym_str(obj, sym->st_name);
4279
if (!ext_name || !ext_name[0])
4280
continue;
4281
4282
ext = obj->externs;
4283
ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
4284
if (!ext)
4285
return -ENOMEM;
4286
obj->externs = ext;
4287
ext = &ext[obj->nr_extern];
4288
memset(ext, 0, sizeof(*ext));
4289
obj->nr_extern++;
4290
4291
ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
4292
if (ext->btf_id <= 0) {
4293
pr_warn("failed to find BTF for extern '%s': %d\n",
4294
ext_name, ext->btf_id);
4295
return ext->btf_id;
4296
}
4297
t = btf__type_by_id(obj->btf, ext->btf_id);
4298
ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off));
4299
if (!ext->name)
4300
return -ENOMEM;
4301
ext->sym_idx = i;
4302
ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
4303
4304
ext_essent_len = bpf_core_essential_name_len(ext->name);
4305
ext->essent_name = NULL;
4306
if (ext_essent_len != strlen(ext->name)) {
4307
ext->essent_name = strndup(ext->name, ext_essent_len);
4308
if (!ext->essent_name)
4309
return -ENOMEM;
4310
}
4311
4312
ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
4313
if (ext->sec_btf_id <= 0) {
4314
pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
4315
ext_name, ext->btf_id, ext->sec_btf_id);
4316
return ext->sec_btf_id;
4317
}
4318
sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
4319
sec_name = btf__name_by_offset(obj->btf, sec->name_off);
4320
4321
if (strcmp(sec_name, KCONFIG_SEC) == 0) {
4322
if (btf_is_func(t)) {
4323
pr_warn("extern function %s is unsupported under %s section\n",
4324
ext->name, KCONFIG_SEC);
4325
return -ENOTSUP;
4326
}
4327
kcfg_sec = sec;
4328
ext->type = EXT_KCFG;
4329
ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
4330
if (ext->kcfg.sz <= 0) {
4331
pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
4332
ext_name, ext->kcfg.sz);
4333
return ext->kcfg.sz;
4334
}
4335
ext->kcfg.align = btf__align_of(obj->btf, t->type);
4336
if (ext->kcfg.align <= 0) {
4337
pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
4338
ext_name, ext->kcfg.align);
4339
return -EINVAL;
4340
}
4341
ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
4342
&ext->kcfg.is_signed);
4343
if (ext->kcfg.type == KCFG_UNKNOWN) {
4344
pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
4345
return -ENOTSUP;
4346
}
4347
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
4348
ksym_sec = sec;
4349
ext->type = EXT_KSYM;
4350
skip_mods_and_typedefs(obj->btf, t->type,
4351
&ext->ksym.type_id);
4352
} else {
4353
pr_warn("unrecognized extern section '%s'\n", sec_name);
4354
return -ENOTSUP;
4355
}
4356
}
4357
pr_debug("collected %d externs total\n", obj->nr_extern);
4358
4359
if (!obj->nr_extern)
4360
return 0;
4361
4362
/* sort externs by type, for kcfg ones also by (align, size, name) */
4363
qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
4364
4365
/* for .ksyms section, we need to turn all externs into allocated
4366
* variables in BTF to pass kernel verification; we do this by
4367
* pretending that each extern is a 8-byte variable
4368
*/
4369
if (ksym_sec) {
4370
/* find existing 4-byte integer type in BTF to use for fake
4371
* extern variables in DATASEC
4372
*/
4373
int int_btf_id = find_int_btf_id(obj->btf);
4374
/* For extern function, a dummy_var added earlier
4375
* will be used to replace the vs->type and
4376
* its name string will be used to refill
4377
* the missing param's name.
4378
*/
4379
const struct btf_type *dummy_var;
4380
4381
dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
4382
for (i = 0; i < obj->nr_extern; i++) {
4383
ext = &obj->externs[i];
4384
if (ext->type != EXT_KSYM)
4385
continue;
4386
pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
4387
i, ext->sym_idx, ext->name);
4388
}
4389
4390
sec = ksym_sec;
4391
n = btf_vlen(sec);
4392
for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
4393
struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4394
struct btf_type *vt;
4395
4396
vt = (void *)btf__type_by_id(obj->btf, vs->type);
4397
ext_name = btf__name_by_offset(obj->btf, vt->name_off);
4398
ext = find_extern_by_name(obj, ext_name);
4399
if (!ext) {
4400
pr_warn("failed to find extern definition for BTF %s '%s'\n",
4401
btf_kind_str(vt), ext_name);
4402
return -ESRCH;
4403
}
4404
if (btf_is_func(vt)) {
4405
const struct btf_type *func_proto;
4406
struct btf_param *param;
4407
int j;
4408
4409
func_proto = btf__type_by_id(obj->btf,
4410
vt->type);
4411
param = btf_params(func_proto);
4412
/* Reuse the dummy_var string if the
4413
* func proto does not have param name.
4414
*/
4415
for (j = 0; j < btf_vlen(func_proto); j++)
4416
if (param[j].type && !param[j].name_off)
4417
param[j].name_off =
4418
dummy_var->name_off;
4419
vs->type = dummy_var_btf_id;
4420
vt->info &= ~0xffff;
4421
vt->info |= BTF_FUNC_GLOBAL;
4422
} else {
4423
btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4424
vt->type = int_btf_id;
4425
}
4426
vs->offset = off;
4427
vs->size = sizeof(int);
4428
}
4429
sec->size = off;
4430
}
4431
4432
if (kcfg_sec) {
4433
sec = kcfg_sec;
4434
/* for kcfg externs calculate their offsets within a .kconfig map */
4435
off = 0;
4436
for (i = 0; i < obj->nr_extern; i++) {
4437
ext = &obj->externs[i];
4438
if (ext->type != EXT_KCFG)
4439
continue;
4440
4441
ext->kcfg.data_off = roundup(off, ext->kcfg.align);
4442
off = ext->kcfg.data_off + ext->kcfg.sz;
4443
pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
4444
i, ext->sym_idx, ext->kcfg.data_off, ext->name);
4445
}
4446
sec->size = off;
4447
n = btf_vlen(sec);
4448
for (i = 0; i < n; i++) {
4449
struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4450
4451
t = btf__type_by_id(obj->btf, vs->type);
4452
ext_name = btf__name_by_offset(obj->btf, t->name_off);
4453
ext = find_extern_by_name(obj, ext_name);
4454
if (!ext) {
4455
pr_warn("failed to find extern definition for BTF var '%s'\n",
4456
ext_name);
4457
return -ESRCH;
4458
}
4459
btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4460
vs->offset = ext->kcfg.data_off;
4461
}
4462
}
4463
return 0;
4464
}
4465
4466
static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
4467
{
4468
return prog->sec_idx == obj->efile.text_shndx;
4469
}
4470
4471
struct bpf_program *
4472
bpf_object__find_program_by_name(const struct bpf_object *obj,
4473
const char *name)
4474
{
4475
struct bpf_program *prog;
4476
4477
bpf_object__for_each_program(prog, obj) {
4478
if (prog_is_subprog(obj, prog))
4479
continue;
4480
if (!strcmp(prog->name, name))
4481
return prog;
4482
}
4483
return errno = ENOENT, NULL;
4484
}
4485
4486
static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
4487
int shndx)
4488
{
4489
switch (obj->efile.secs[shndx].sec_type) {
4490
case SEC_BSS:
4491
case SEC_DATA:
4492
case SEC_RODATA:
4493
return true;
4494
default:
4495
return false;
4496
}
4497
}
4498
4499
static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
4500
int shndx)
4501
{
4502
return shndx == obj->efile.btf_maps_shndx;
4503
}
4504
4505
static enum libbpf_map_type
4506
bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
4507
{
4508
if (shndx == obj->efile.symbols_shndx)
4509
return LIBBPF_MAP_KCONFIG;
4510
4511
switch (obj->efile.secs[shndx].sec_type) {
4512
case SEC_BSS:
4513
return LIBBPF_MAP_BSS;
4514
case SEC_DATA:
4515
return LIBBPF_MAP_DATA;
4516
case SEC_RODATA:
4517
return LIBBPF_MAP_RODATA;
4518
default:
4519
return LIBBPF_MAP_UNSPEC;
4520
}
4521
}
4522
4523
static int bpf_prog_compute_hash(struct bpf_program *prog)
4524
{
4525
struct bpf_insn *purged;
4526
int i, err = 0;
4527
4528
purged = calloc(prog->insns_cnt, BPF_INSN_SZ);
4529
if (!purged)
4530
return -ENOMEM;
4531
4532
/* If relocations have been done, the map_fd needs to be
4533
* discarded for the digest calculation.
4534
*/
4535
for (i = 0; i < prog->insns_cnt; i++) {
4536
purged[i] = prog->insns[i];
4537
if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
4538
(purged[i].src_reg == BPF_PSEUDO_MAP_FD ||
4539
purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
4540
purged[i].imm = 0;
4541
i++;
4542
if (i >= prog->insns_cnt ||
4543
prog->insns[i].code != 0 ||
4544
prog->insns[i].dst_reg != 0 ||
4545
prog->insns[i].src_reg != 0 ||
4546
prog->insns[i].off != 0) {
4547
err = -EINVAL;
4548
goto out;
4549
}
4550
purged[i] = prog->insns[i];
4551
purged[i].imm = 0;
4552
}
4553
}
4554
libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn),
4555
prog->hash);
4556
out:
4557
free(purged);
4558
return err;
4559
}
4560
4561
static int bpf_program__record_reloc(struct bpf_program *prog,
4562
struct reloc_desc *reloc_desc,
4563
__u32 insn_idx, const char *sym_name,
4564
const Elf64_Sym *sym, const Elf64_Rel *rel)
4565
{
4566
struct bpf_insn *insn = &prog->insns[insn_idx];
4567
size_t map_idx, nr_maps = prog->obj->nr_maps;
4568
struct bpf_object *obj = prog->obj;
4569
__u32 shdr_idx = sym->st_shndx;
4570
enum libbpf_map_type type;
4571
const char *sym_sec_name;
4572
struct bpf_map *map;
4573
4574
if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
4575
pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
4576
prog->name, sym_name, insn_idx, insn->code);
4577
return -LIBBPF_ERRNO__RELOC;
4578
}
4579
4580
if (sym_is_extern(sym)) {
4581
int sym_idx = ELF64_R_SYM(rel->r_info);
4582
int i, n = obj->nr_extern;
4583
struct extern_desc *ext;
4584
4585
for (i = 0; i < n; i++) {
4586
ext = &obj->externs[i];
4587
if (ext->sym_idx == sym_idx)
4588
break;
4589
}
4590
if (i >= n) {
4591
pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
4592
prog->name, sym_name, sym_idx);
4593
return -LIBBPF_ERRNO__RELOC;
4594
}
4595
pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
4596
prog->name, i, ext->name, ext->sym_idx, insn_idx);
4597
if (insn->code == (BPF_JMP | BPF_CALL))
4598
reloc_desc->type = RELO_EXTERN_CALL;
4599
else
4600
reloc_desc->type = RELO_EXTERN_LD64;
4601
reloc_desc->insn_idx = insn_idx;
4602
reloc_desc->ext_idx = i;
4603
return 0;
4604
}
4605
4606
/* sub-program call relocation */
4607
if (is_call_insn(insn)) {
4608
if (insn->src_reg != BPF_PSEUDO_CALL) {
4609
pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
4610
return -LIBBPF_ERRNO__RELOC;
4611
}
4612
/* text_shndx can be 0, if no default "main" program exists */
4613
if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
4614
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4615
pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4616
prog->name, sym_name, sym_sec_name);
4617
return -LIBBPF_ERRNO__RELOC;
4618
}
4619
if (sym->st_value % BPF_INSN_SZ) {
4620
pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4621
prog->name, sym_name, (size_t)sym->st_value);
4622
return -LIBBPF_ERRNO__RELOC;
4623
}
4624
reloc_desc->type = RELO_CALL;
4625
reloc_desc->insn_idx = insn_idx;
4626
reloc_desc->sym_off = sym->st_value;
4627
return 0;
4628
}
4629
4630
if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
4631
pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4632
prog->name, sym_name, shdr_idx);
4633
return -LIBBPF_ERRNO__RELOC;
4634
}
4635
4636
/* loading subprog addresses */
4637
if (sym_is_subprog(sym, obj->efile.text_shndx)) {
4638
/* global_func: sym->st_value = offset in the section, insn->imm = 0.
4639
* local_func: sym->st_value = 0, insn->imm = offset in the section.
4640
*/
4641
if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
4642
pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4643
prog->name, sym_name, (size_t)sym->st_value, insn->imm);
4644
return -LIBBPF_ERRNO__RELOC;
4645
}
4646
4647
reloc_desc->type = RELO_SUBPROG_ADDR;
4648
reloc_desc->insn_idx = insn_idx;
4649
reloc_desc->sym_off = sym->st_value;
4650
return 0;
4651
}
4652
4653
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4654
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4655
4656
/* arena data relocation */
4657
if (shdr_idx == obj->efile.arena_data_shndx) {
4658
if (obj->arena_map_idx < 0) {
4659
pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n",
4660
prog->name, insn_idx);
4661
return -LIBBPF_ERRNO__RELOC;
4662
}
4663
reloc_desc->type = RELO_DATA;
4664
reloc_desc->insn_idx = insn_idx;
4665
reloc_desc->map_idx = obj->arena_map_idx;
4666
reloc_desc->sym_off = sym->st_value;
4667
4668
map = &obj->maps[obj->arena_map_idx];
4669
pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n",
4670
prog->name, obj->arena_map_idx, map->name, map->sec_idx,
4671
map->sec_offset, insn_idx);
4672
return 0;
4673
}
4674
4675
/* jump table data relocation */
4676
if (shdr_idx == obj->efile.jumptables_data_shndx) {
4677
reloc_desc->type = RELO_INSN_ARRAY;
4678
reloc_desc->insn_idx = insn_idx;
4679
reloc_desc->map_idx = -1;
4680
reloc_desc->sym_off = sym->st_value;
4681
reloc_desc->sym_size = sym->st_size;
4682
return 0;
4683
}
4684
4685
/* generic map reference relocation */
4686
if (type == LIBBPF_MAP_UNSPEC) {
4687
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4688
pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4689
prog->name, sym_name, sym_sec_name);
4690
return -LIBBPF_ERRNO__RELOC;
4691
}
4692
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4693
map = &obj->maps[map_idx];
4694
if (map->libbpf_type != type ||
4695
map->sec_idx != sym->st_shndx ||
4696
map->sec_offset != sym->st_value)
4697
continue;
4698
pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4699
prog->name, map_idx, map->name, map->sec_idx,
4700
map->sec_offset, insn_idx);
4701
break;
4702
}
4703
if (map_idx >= nr_maps) {
4704
pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4705
prog->name, sym_sec_name, (size_t)sym->st_value);
4706
return -LIBBPF_ERRNO__RELOC;
4707
}
4708
reloc_desc->type = RELO_LD64;
4709
reloc_desc->insn_idx = insn_idx;
4710
reloc_desc->map_idx = map_idx;
4711
reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4712
return 0;
4713
}
4714
4715
/* global data map relocation */
4716
if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4717
pr_warn("prog '%s': bad data relo against section '%s'\n",
4718
prog->name, sym_sec_name);
4719
return -LIBBPF_ERRNO__RELOC;
4720
}
4721
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4722
map = &obj->maps[map_idx];
4723
if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4724
continue;
4725
pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4726
prog->name, map_idx, map->name, map->sec_idx,
4727
map->sec_offset, insn_idx);
4728
break;
4729
}
4730
if (map_idx >= nr_maps) {
4731
pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4732
prog->name, sym_sec_name);
4733
return -LIBBPF_ERRNO__RELOC;
4734
}
4735
4736
reloc_desc->type = RELO_DATA;
4737
reloc_desc->insn_idx = insn_idx;
4738
reloc_desc->map_idx = map_idx;
4739
reloc_desc->sym_off = sym->st_value;
4740
return 0;
4741
}
4742
4743
static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4744
{
4745
return insn_idx >= prog->sec_insn_off &&
4746
insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4747
}
4748
4749
static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4750
size_t sec_idx, size_t insn_idx)
4751
{
4752
int l = 0, r = obj->nr_programs - 1, m;
4753
struct bpf_program *prog;
4754
4755
if (!obj->nr_programs)
4756
return NULL;
4757
4758
while (l < r) {
4759
m = l + (r - l + 1) / 2;
4760
prog = &obj->programs[m];
4761
4762
if (prog->sec_idx < sec_idx ||
4763
(prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4764
l = m;
4765
else
4766
r = m - 1;
4767
}
4768
/* matching program could be at index l, but it still might be the
4769
* wrong one, so we need to double check conditions for the last time
4770
*/
4771
prog = &obj->programs[l];
4772
if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4773
return prog;
4774
return NULL;
4775
}
4776
4777
static int
4778
bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4779
{
4780
const char *relo_sec_name, *sec_name;
4781
size_t sec_idx = shdr->sh_info, sym_idx;
4782
struct bpf_program *prog;
4783
struct reloc_desc *relos;
4784
int err, i, nrels;
4785
const char *sym_name;
4786
__u32 insn_idx;
4787
Elf_Scn *scn;
4788
Elf_Data *scn_data;
4789
Elf64_Sym *sym;
4790
Elf64_Rel *rel;
4791
4792
if (sec_idx >= obj->efile.sec_cnt)
4793
return -EINVAL;
4794
4795
scn = elf_sec_by_idx(obj, sec_idx);
4796
scn_data = elf_sec_data(obj, scn);
4797
if (!scn_data)
4798
return -LIBBPF_ERRNO__FORMAT;
4799
4800
relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4801
sec_name = elf_sec_name(obj, scn);
4802
if (!relo_sec_name || !sec_name)
4803
return -EINVAL;
4804
4805
pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4806
relo_sec_name, sec_idx, sec_name);
4807
nrels = shdr->sh_size / shdr->sh_entsize;
4808
4809
for (i = 0; i < nrels; i++) {
4810
rel = elf_rel_by_idx(data, i);
4811
if (!rel) {
4812
pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4813
return -LIBBPF_ERRNO__FORMAT;
4814
}
4815
4816
sym_idx = ELF64_R_SYM(rel->r_info);
4817
sym = elf_sym_by_idx(obj, sym_idx);
4818
if (!sym) {
4819
pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4820
relo_sec_name, sym_idx, i);
4821
return -LIBBPF_ERRNO__FORMAT;
4822
}
4823
4824
if (sym->st_shndx >= obj->efile.sec_cnt) {
4825
pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4826
relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4827
return -LIBBPF_ERRNO__FORMAT;
4828
}
4829
4830
if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4831
pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4832
relo_sec_name, (size_t)rel->r_offset, i);
4833
return -LIBBPF_ERRNO__FORMAT;
4834
}
4835
4836
insn_idx = rel->r_offset / BPF_INSN_SZ;
4837
/* relocations against static functions are recorded as
4838
* relocations against the section that contains a function;
4839
* in such case, symbol will be STT_SECTION and sym.st_name
4840
* will point to empty string (0), so fetch section name
4841
* instead
4842
*/
4843
if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4844
sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4845
else
4846
sym_name = elf_sym_str(obj, sym->st_name);
4847
sym_name = sym_name ?: "<?";
4848
4849
pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4850
relo_sec_name, i, insn_idx, sym_name);
4851
4852
prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4853
if (!prog) {
4854
pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4855
relo_sec_name, i, sec_name, insn_idx);
4856
continue;
4857
}
4858
4859
relos = libbpf_reallocarray(prog->reloc_desc,
4860
prog->nr_reloc + 1, sizeof(*relos));
4861
if (!relos)
4862
return -ENOMEM;
4863
prog->reloc_desc = relos;
4864
4865
/* adjust insn_idx to local BPF program frame of reference */
4866
insn_idx -= prog->sec_insn_off;
4867
err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4868
insn_idx, sym_name, sym, rel);
4869
if (err)
4870
return err;
4871
4872
prog->nr_reloc++;
4873
}
4874
return 0;
4875
}
4876
4877
static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
4878
{
4879
int id;
4880
4881
if (!obj->btf)
4882
return -ENOENT;
4883
4884
/* if it's BTF-defined map, we don't need to search for type IDs.
4885
* For struct_ops map, it does not need btf_key_type_id and
4886
* btf_value_type_id.
4887
*/
4888
if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4889
return 0;
4890
4891
/*
4892
* LLVM annotates global data differently in BTF, that is,
4893
* only as '.data', '.bss' or '.rodata'.
4894
*/
4895
if (!bpf_map__is_internal(map))
4896
return -ENOENT;
4897
4898
id = btf__find_by_name(obj->btf, map->real_name);
4899
if (id < 0)
4900
return id;
4901
4902
map->btf_key_type_id = 0;
4903
map->btf_value_type_id = id;
4904
return 0;
4905
}
4906
4907
static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4908
{
4909
char file[PATH_MAX], buff[4096];
4910
FILE *fp;
4911
__u32 val;
4912
int err;
4913
4914
snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4915
memset(info, 0, sizeof(*info));
4916
4917
fp = fopen(file, "re");
4918
if (!fp) {
4919
err = -errno;
4920
pr_warn("failed to open %s: %s. No procfs support?\n", file,
4921
errstr(err));
4922
return err;
4923
}
4924
4925
while (fgets(buff, sizeof(buff), fp)) {
4926
if (sscanf(buff, "map_type:\t%u", &val) == 1)
4927
info->type = val;
4928
else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4929
info->key_size = val;
4930
else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4931
info->value_size = val;
4932
else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4933
info->max_entries = val;
4934
else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4935
info->map_flags = val;
4936
}
4937
4938
fclose(fp);
4939
4940
return 0;
4941
}
4942
4943
static bool map_is_created(const struct bpf_map *map)
4944
{
4945
return map->obj->state >= OBJ_PREPARED || map->reused;
4946
}
4947
4948
bool bpf_map__autocreate(const struct bpf_map *map)
4949
{
4950
return map->autocreate;
4951
}
4952
4953
int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4954
{
4955
if (map_is_created(map))
4956
return libbpf_err(-EBUSY);
4957
4958
map->autocreate = autocreate;
4959
return 0;
4960
}
4961
4962
int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach)
4963
{
4964
if (!bpf_map__is_struct_ops(map))
4965
return libbpf_err(-EINVAL);
4966
4967
map->autoattach = autoattach;
4968
return 0;
4969
}
4970
4971
bool bpf_map__autoattach(const struct bpf_map *map)
4972
{
4973
return map->autoattach;
4974
}
4975
4976
int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4977
{
4978
struct bpf_map_info info;
4979
__u32 len = sizeof(info), name_len;
4980
int new_fd, err;
4981
char *new_name;
4982
4983
memset(&info, 0, len);
4984
err = bpf_map_get_info_by_fd(fd, &info, &len);
4985
if (err && errno == EINVAL)
4986
err = bpf_get_map_info_from_fdinfo(fd, &info);
4987
if (err)
4988
return libbpf_err(err);
4989
4990
name_len = strlen(info.name);
4991
if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4992
new_name = strdup(map->name);
4993
else
4994
new_name = strdup(info.name);
4995
4996
if (!new_name)
4997
return libbpf_err(-errno);
4998
4999
/*
5000
* Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
5001
* This is similar to what we do in ensure_good_fd(), but without
5002
* closing original FD.
5003
*/
5004
new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
5005
if (new_fd < 0) {
5006
err = -errno;
5007
goto err_free_new_name;
5008
}
5009
5010
err = reuse_fd(map->fd, new_fd);
5011
if (err)
5012
goto err_free_new_name;
5013
5014
free(map->name);
5015
5016
map->name = new_name;
5017
map->def.type = info.type;
5018
map->def.key_size = info.key_size;
5019
map->def.value_size = info.value_size;
5020
map->def.max_entries = info.max_entries;
5021
map->def.map_flags = info.map_flags;
5022
map->btf_key_type_id = info.btf_key_type_id;
5023
map->btf_value_type_id = info.btf_value_type_id;
5024
map->reused = true;
5025
map->map_extra = info.map_extra;
5026
5027
return 0;
5028
5029
err_free_new_name:
5030
free(new_name);
5031
return libbpf_err(err);
5032
}
5033
5034
__u32 bpf_map__max_entries(const struct bpf_map *map)
5035
{
5036
return map->def.max_entries;
5037
}
5038
5039
struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
5040
{
5041
if (!bpf_map_type__is_map_in_map(map->def.type))
5042
return errno = EINVAL, NULL;
5043
5044
return map->inner_map;
5045
}
5046
5047
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
5048
{
5049
if (map_is_created(map))
5050
return libbpf_err(-EBUSY);
5051
5052
map->def.max_entries = max_entries;
5053
5054
/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
5055
if (map_is_ringbuf(map))
5056
map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
5057
5058
return 0;
5059
}
5060
5061
static int bpf_object_prepare_token(struct bpf_object *obj)
5062
{
5063
const char *bpffs_path;
5064
int bpffs_fd = -1, token_fd, err;
5065
bool mandatory;
5066
enum libbpf_print_level level;
5067
5068
/* token is explicitly prevented */
5069
if (obj->token_path && obj->token_path[0] == '\0') {
5070
pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
5071
return 0;
5072
}
5073
5074
mandatory = obj->token_path != NULL;
5075
level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
5076
5077
bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
5078
bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
5079
if (bpffs_fd < 0) {
5080
err = -errno;
5081
__pr(level, "object '%s': failed (%s) to open BPF FS mount at '%s'%s\n",
5082
obj->name, errstr(err), bpffs_path,
5083
mandatory ? "" : ", skipping optional step...");
5084
return mandatory ? err : 0;
5085
}
5086
5087
token_fd = bpf_token_create(bpffs_fd, 0);
5088
close(bpffs_fd);
5089
if (token_fd < 0) {
5090
if (!mandatory && token_fd == -ENOENT) {
5091
pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
5092
obj->name, bpffs_path);
5093
return 0;
5094
}
5095
__pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
5096
obj->name, token_fd, bpffs_path,
5097
mandatory ? "" : ", skipping optional step...");
5098
return mandatory ? token_fd : 0;
5099
}
5100
5101
obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
5102
if (!obj->feat_cache) {
5103
close(token_fd);
5104
return -ENOMEM;
5105
}
5106
5107
obj->token_fd = token_fd;
5108
obj->feat_cache->token_fd = token_fd;
5109
5110
return 0;
5111
}
5112
5113
static int
5114
bpf_object__probe_loading(struct bpf_object *obj)
5115
{
5116
struct bpf_insn insns[] = {
5117
BPF_MOV64_IMM(BPF_REG_0, 0),
5118
BPF_EXIT_INSN(),
5119
};
5120
int ret, insn_cnt = ARRAY_SIZE(insns);
5121
LIBBPF_OPTS(bpf_prog_load_opts, opts,
5122
.token_fd = obj->token_fd,
5123
.prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0,
5124
);
5125
5126
if (obj->gen_loader)
5127
return 0;
5128
5129
ret = bump_rlimit_memlock();
5130
if (ret)
5131
pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %s), you might need to do it explicitly!\n",
5132
errstr(ret));
5133
5134
/* make sure basic loading works */
5135
ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
5136
if (ret < 0)
5137
ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
5138
if (ret < 0) {
5139
ret = errno;
5140
pr_warn("Error in %s(): %s. Couldn't load trivial BPF program. Make sure your kernel supports BPF (CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is set to big enough value.\n",
5141
__func__, errstr(ret));
5142
return -ret;
5143
}
5144
close(ret);
5145
5146
return 0;
5147
}
5148
5149
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
5150
{
5151
if (obj->gen_loader)
5152
/* To generate loader program assume the latest kernel
5153
* to avoid doing extra prog_load, map_create syscalls.
5154
*/
5155
return true;
5156
5157
if (obj->token_fd)
5158
return feat_supported(obj->feat_cache, feat_id);
5159
5160
return feat_supported(NULL, feat_id);
5161
}
5162
5163
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
5164
{
5165
struct bpf_map_info map_info;
5166
__u32 map_info_len = sizeof(map_info);
5167
int err;
5168
5169
memset(&map_info, 0, map_info_len);
5170
err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
5171
if (err && errno == EINVAL)
5172
err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
5173
if (err) {
5174
pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
5175
errstr(err));
5176
return false;
5177
}
5178
5179
/*
5180
* bpf_get_map_info_by_fd() for DEVMAP will always return flags with
5181
* BPF_F_RDONLY_PROG set, but it generally is not set at map creation time.
5182
* Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from
5183
* bpf_get_map_info_by_fd() when checking for compatibility with an
5184
* existing DEVMAP.
5185
*/
5186
if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH)
5187
map_info.map_flags &= ~BPF_F_RDONLY_PROG;
5188
5189
return (map_info.type == map->def.type &&
5190
map_info.key_size == map->def.key_size &&
5191
map_info.value_size == map->def.value_size &&
5192
map_info.max_entries == map->def.max_entries &&
5193
map_info.map_flags == map->def.map_flags &&
5194
map_info.map_extra == map->map_extra);
5195
}
5196
5197
static int
5198
bpf_object__reuse_map(struct bpf_map *map)
5199
{
5200
int err, pin_fd;
5201
5202
pin_fd = bpf_obj_get(map->pin_path);
5203
if (pin_fd < 0) {
5204
err = -errno;
5205
if (err == -ENOENT) {
5206
pr_debug("found no pinned map to reuse at '%s'\n",
5207
map->pin_path);
5208
return 0;
5209
}
5210
5211
pr_warn("couldn't retrieve pinned map '%s': %s\n",
5212
map->pin_path, errstr(err));
5213
return err;
5214
}
5215
5216
if (!map_is_reuse_compat(map, pin_fd)) {
5217
pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
5218
map->pin_path);
5219
close(pin_fd);
5220
return -EINVAL;
5221
}
5222
5223
err = bpf_map__reuse_fd(map, pin_fd);
5224
close(pin_fd);
5225
if (err)
5226
return err;
5227
5228
map->pinned = true;
5229
pr_debug("reused pinned map at '%s'\n", map->pin_path);
5230
5231
return 0;
5232
}
5233
5234
static int
5235
bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
5236
{
5237
enum libbpf_map_type map_type = map->libbpf_type;
5238
int err, zero = 0;
5239
size_t mmap_sz;
5240
5241
if (obj->gen_loader) {
5242
bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
5243
map->mmaped, map->def.value_size);
5244
if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
5245
bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
5246
return 0;
5247
}
5248
5249
err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
5250
if (err) {
5251
err = -errno;
5252
pr_warn("map '%s': failed to set initial contents: %s\n",
5253
bpf_map__name(map), errstr(err));
5254
return err;
5255
}
5256
5257
/* Freeze .rodata and .kconfig map as read-only from syscall side. */
5258
if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
5259
err = bpf_map_freeze(map->fd);
5260
if (err) {
5261
err = -errno;
5262
pr_warn("map '%s': failed to freeze as read-only: %s\n",
5263
bpf_map__name(map), errstr(err));
5264
return err;
5265
}
5266
}
5267
5268
/* Remap anonymous mmap()-ed "map initialization image" as
5269
* a BPF map-backed mmap()-ed memory, but preserving the same
5270
* memory address. This will cause kernel to change process'
5271
* page table to point to a different piece of kernel memory,
5272
* but from userspace point of view memory address (and its
5273
* contents, being identical at this point) will stay the
5274
* same. This mapping will be released by bpf_object__close()
5275
* as per normal clean up procedure.
5276
*/
5277
mmap_sz = bpf_map_mmap_sz(map);
5278
if (map->def.map_flags & BPF_F_MMAPABLE) {
5279
void *mmaped;
5280
int prot;
5281
5282
if (map->def.map_flags & BPF_F_RDONLY_PROG)
5283
prot = PROT_READ;
5284
else
5285
prot = PROT_READ | PROT_WRITE;
5286
mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0);
5287
if (mmaped == MAP_FAILED) {
5288
err = -errno;
5289
pr_warn("map '%s': failed to re-mmap() contents: %s\n",
5290
bpf_map__name(map), errstr(err));
5291
return err;
5292
}
5293
map->mmaped = mmaped;
5294
} else if (map->mmaped) {
5295
munmap(map->mmaped, mmap_sz);
5296
map->mmaped = NULL;
5297
}
5298
5299
return 0;
5300
}
5301
5302
static void bpf_map__destroy(struct bpf_map *map);
5303
5304
static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
5305
{
5306
LIBBPF_OPTS(bpf_map_create_opts, create_attr);
5307
struct bpf_map_def *def = &map->def;
5308
const char *map_name = NULL;
5309
int err = 0, map_fd;
5310
5311
if (kernel_supports(obj, FEAT_PROG_NAME))
5312
map_name = map->name;
5313
create_attr.map_ifindex = map->map_ifindex;
5314
create_attr.map_flags = def->map_flags;
5315
create_attr.numa_node = map->numa_node;
5316
create_attr.map_extra = map->map_extra;
5317
create_attr.token_fd = obj->token_fd;
5318
if (obj->token_fd)
5319
create_attr.map_flags |= BPF_F_TOKEN_FD;
5320
if (map->excl_prog) {
5321
err = bpf_prog_compute_hash(map->excl_prog);
5322
if (err)
5323
return err;
5324
5325
create_attr.excl_prog_hash = map->excl_prog->hash;
5326
create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH;
5327
}
5328
5329
if (bpf_map__is_struct_ops(map)) {
5330
create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
5331
if (map->mod_btf_fd >= 0) {
5332
create_attr.value_type_btf_obj_fd = map->mod_btf_fd;
5333
create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD;
5334
}
5335
}
5336
5337
if (obj->btf && btf__fd(obj->btf) >= 0) {
5338
create_attr.btf_fd = btf__fd(obj->btf);
5339
create_attr.btf_key_type_id = map->btf_key_type_id;
5340
create_attr.btf_value_type_id = map->btf_value_type_id;
5341
}
5342
5343
if (bpf_map_type__is_map_in_map(def->type)) {
5344
if (map->inner_map) {
5345
err = map_set_def_max_entries(map->inner_map);
5346
if (err)
5347
return err;
5348
err = bpf_object__create_map(obj, map->inner_map, true);
5349
if (err) {
5350
pr_warn("map '%s': failed to create inner map: %s\n",
5351
map->name, errstr(err));
5352
return err;
5353
}
5354
map->inner_map_fd = map->inner_map->fd;
5355
}
5356
if (map->inner_map_fd >= 0)
5357
create_attr.inner_map_fd = map->inner_map_fd;
5358
}
5359
5360
switch (def->type) {
5361
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5362
case BPF_MAP_TYPE_CGROUP_ARRAY:
5363
case BPF_MAP_TYPE_STACK_TRACE:
5364
case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5365
case BPF_MAP_TYPE_HASH_OF_MAPS:
5366
case BPF_MAP_TYPE_DEVMAP:
5367
case BPF_MAP_TYPE_DEVMAP_HASH:
5368
case BPF_MAP_TYPE_CPUMAP:
5369
case BPF_MAP_TYPE_XSKMAP:
5370
case BPF_MAP_TYPE_SOCKMAP:
5371
case BPF_MAP_TYPE_SOCKHASH:
5372
case BPF_MAP_TYPE_QUEUE:
5373
case BPF_MAP_TYPE_STACK:
5374
case BPF_MAP_TYPE_ARENA:
5375
create_attr.btf_fd = 0;
5376
create_attr.btf_key_type_id = 0;
5377
create_attr.btf_value_type_id = 0;
5378
map->btf_key_type_id = 0;
5379
map->btf_value_type_id = 0;
5380
break;
5381
case BPF_MAP_TYPE_STRUCT_OPS:
5382
create_attr.btf_value_type_id = 0;
5383
break;
5384
default:
5385
break;
5386
}
5387
5388
if (obj->gen_loader) {
5389
bpf_gen__map_create(obj->gen_loader, def->type, map_name,
5390
def->key_size, def->value_size, def->max_entries,
5391
&create_attr, is_inner ? -1 : map - obj->maps);
5392
/* We keep pretenting we have valid FD to pass various fd >= 0
5393
* checks by just keeping original placeholder FDs in place.
5394
* See bpf_object__add_map() comment.
5395
* This placeholder fd will not be used with any syscall and
5396
* will be reset to -1 eventually.
5397
*/
5398
map_fd = map->fd;
5399
} else {
5400
map_fd = bpf_map_create(def->type, map_name,
5401
def->key_size, def->value_size,
5402
def->max_entries, &create_attr);
5403
}
5404
if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
5405
err = -errno;
5406
pr_warn("Error in bpf_create_map_xattr(%s): %s. Retrying without BTF.\n",
5407
map->name, errstr(err));
5408
create_attr.btf_fd = 0;
5409
create_attr.btf_key_type_id = 0;
5410
create_attr.btf_value_type_id = 0;
5411
map->btf_key_type_id = 0;
5412
map->btf_value_type_id = 0;
5413
map_fd = bpf_map_create(def->type, map_name,
5414
def->key_size, def->value_size,
5415
def->max_entries, &create_attr);
5416
}
5417
5418
if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
5419
if (obj->gen_loader)
5420
map->inner_map->fd = -1;
5421
bpf_map__destroy(map->inner_map);
5422
zfree(&map->inner_map);
5423
}
5424
5425
if (map_fd < 0)
5426
return map_fd;
5427
5428
/* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
5429
if (map->fd == map_fd)
5430
return 0;
5431
5432
/* Keep placeholder FD value but now point it to the BPF map object.
5433
* This way everything that relied on this map's FD (e.g., relocated
5434
* ldimm64 instructions) will stay valid and won't need adjustments.
5435
* map->fd stays valid but now point to what map_fd points to.
5436
*/
5437
return reuse_fd(map->fd, map_fd);
5438
}
5439
5440
static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
5441
{
5442
const struct bpf_map *targ_map;
5443
unsigned int i;
5444
int fd, err = 0;
5445
5446
for (i = 0; i < map->init_slots_sz; i++) {
5447
if (!map->init_slots[i])
5448
continue;
5449
5450
targ_map = map->init_slots[i];
5451
fd = targ_map->fd;
5452
5453
if (obj->gen_loader) {
5454
bpf_gen__populate_outer_map(obj->gen_loader,
5455
map - obj->maps, i,
5456
targ_map - obj->maps);
5457
} else {
5458
err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5459
}
5460
if (err) {
5461
err = -errno;
5462
pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %s\n",
5463
map->name, i, targ_map->name, fd, errstr(err));
5464
return err;
5465
}
5466
pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5467
map->name, i, targ_map->name, fd);
5468
}
5469
5470
zfree(&map->init_slots);
5471
map->init_slots_sz = 0;
5472
5473
return 0;
5474
}
5475
5476
static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5477
{
5478
const struct bpf_program *targ_prog;
5479
unsigned int i;
5480
int fd, err;
5481
5482
if (obj->gen_loader)
5483
return -ENOTSUP;
5484
5485
for (i = 0; i < map->init_slots_sz; i++) {
5486
if (!map->init_slots[i])
5487
continue;
5488
5489
targ_prog = map->init_slots[i];
5490
fd = bpf_program__fd(targ_prog);
5491
5492
err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5493
if (err) {
5494
err = -errno;
5495
pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %s\n",
5496
map->name, i, targ_prog->name, fd, errstr(err));
5497
return err;
5498
}
5499
pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5500
map->name, i, targ_prog->name, fd);
5501
}
5502
5503
zfree(&map->init_slots);
5504
map->init_slots_sz = 0;
5505
5506
return 0;
5507
}
5508
5509
static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5510
{
5511
struct bpf_map *map;
5512
int i, err;
5513
5514
for (i = 0; i < obj->nr_maps; i++) {
5515
map = &obj->maps[i];
5516
5517
if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5518
continue;
5519
5520
err = init_prog_array_slots(obj, map);
5521
if (err < 0)
5522
return err;
5523
}
5524
return 0;
5525
}
5526
5527
static int map_set_def_max_entries(struct bpf_map *map)
5528
{
5529
if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5530
int nr_cpus;
5531
5532
nr_cpus = libbpf_num_possible_cpus();
5533
if (nr_cpus < 0) {
5534
pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5535
map->name, nr_cpus);
5536
return nr_cpus;
5537
}
5538
pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5539
map->def.max_entries = nr_cpus;
5540
}
5541
5542
return 0;
5543
}
5544
5545
static int
5546
bpf_object__create_maps(struct bpf_object *obj)
5547
{
5548
struct bpf_map *map;
5549
unsigned int i, j;
5550
int err;
5551
bool retried;
5552
5553
for (i = 0; i < obj->nr_maps; i++) {
5554
map = &obj->maps[i];
5555
5556
/* To support old kernels, we skip creating global data maps
5557
* (.rodata, .data, .kconfig, etc); later on, during program
5558
* loading, if we detect that at least one of the to-be-loaded
5559
* programs is referencing any global data map, we'll error
5560
* out with program name and relocation index logged.
5561
* This approach allows to accommodate Clang emitting
5562
* unnecessary .rodata.str1.1 sections for string literals,
5563
* but also it allows to have CO-RE applications that use
5564
* global variables in some of BPF programs, but not others.
5565
* If those global variable-using programs are not loaded at
5566
* runtime due to bpf_program__set_autoload(prog, false),
5567
* bpf_object loading will succeed just fine even on old
5568
* kernels.
5569
*/
5570
if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5571
map->autocreate = false;
5572
5573
if (!map->autocreate) {
5574
pr_debug("map '%s': skipped auto-creating...\n", map->name);
5575
continue;
5576
}
5577
5578
err = map_set_def_max_entries(map);
5579
if (err)
5580
goto err_out;
5581
5582
retried = false;
5583
retry:
5584
if (map->pin_path) {
5585
err = bpf_object__reuse_map(map);
5586
if (err) {
5587
pr_warn("map '%s': error reusing pinned map\n",
5588
map->name);
5589
goto err_out;
5590
}
5591
if (retried && map->fd < 0) {
5592
pr_warn("map '%s': cannot find pinned map\n",
5593
map->name);
5594
err = -ENOENT;
5595
goto err_out;
5596
}
5597
}
5598
5599
if (map->reused) {
5600
pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5601
map->name, map->fd);
5602
} else {
5603
err = bpf_object__create_map(obj, map, false);
5604
if (err)
5605
goto err_out;
5606
5607
pr_debug("map '%s': created successfully, fd=%d\n",
5608
map->name, map->fd);
5609
5610
if (bpf_map__is_internal(map)) {
5611
err = bpf_object__populate_internal_map(obj, map);
5612
if (err < 0)
5613
goto err_out;
5614
} else if (map->def.type == BPF_MAP_TYPE_ARENA) {
5615
map->mmaped = mmap((void *)(long)map->map_extra,
5616
bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
5617
map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
5618
map->fd, 0);
5619
if (map->mmaped == MAP_FAILED) {
5620
err = -errno;
5621
map->mmaped = NULL;
5622
pr_warn("map '%s': failed to mmap arena: %s\n",
5623
map->name, errstr(err));
5624
return err;
5625
}
5626
if (obj->arena_data) {
5627
memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
5628
zfree(&obj->arena_data);
5629
}
5630
}
5631
if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5632
err = init_map_in_map_slots(obj, map);
5633
if (err < 0)
5634
goto err_out;
5635
}
5636
}
5637
5638
if (map->pin_path && !map->pinned) {
5639
err = bpf_map__pin(map, NULL);
5640
if (err) {
5641
if (!retried && err == -EEXIST) {
5642
retried = true;
5643
goto retry;
5644
}
5645
pr_warn("map '%s': failed to auto-pin at '%s': %s\n",
5646
map->name, map->pin_path, errstr(err));
5647
goto err_out;
5648
}
5649
}
5650
}
5651
5652
return 0;
5653
5654
err_out:
5655
pr_warn("map '%s': failed to create: %s\n", map->name, errstr(err));
5656
pr_perm_msg(err);
5657
for (j = 0; j < i; j++)
5658
zclose(obj->maps[j].fd);
5659
return err;
5660
}
5661
5662
static bool bpf_core_is_flavor_sep(const char *s)
5663
{
5664
/* check X___Y name pattern, where X and Y are not underscores */
5665
return s[0] != '_' && /* X */
5666
s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
5667
s[4] != '_'; /* Y */
5668
}
5669
5670
/* Given 'some_struct_name___with_flavor' return the length of a name prefix
5671
* before last triple underscore. Struct name part after last triple
5672
* underscore is ignored by BPF CO-RE relocation during relocation matching.
5673
*/
5674
size_t bpf_core_essential_name_len(const char *name)
5675
{
5676
size_t n = strlen(name);
5677
int i;
5678
5679
for (i = n - 5; i >= 0; i--) {
5680
if (bpf_core_is_flavor_sep(name + i))
5681
return i + 1;
5682
}
5683
return n;
5684
}
5685
5686
void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5687
{
5688
if (!cands)
5689
return;
5690
5691
free(cands->cands);
5692
free(cands);
5693
}
5694
5695
int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5696
size_t local_essent_len,
5697
const struct btf *targ_btf,
5698
const char *targ_btf_name,
5699
int targ_start_id,
5700
struct bpf_core_cand_list *cands)
5701
{
5702
struct bpf_core_cand *new_cands, *cand;
5703
const struct btf_type *t, *local_t;
5704
const char *targ_name, *local_name;
5705
size_t targ_essent_len;
5706
int n, i;
5707
5708
local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5709
local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5710
5711
n = btf__type_cnt(targ_btf);
5712
for (i = targ_start_id; i < n; i++) {
5713
t = btf__type_by_id(targ_btf, i);
5714
if (!btf_kind_core_compat(t, local_t))
5715
continue;
5716
5717
targ_name = btf__name_by_offset(targ_btf, t->name_off);
5718
if (str_is_empty(targ_name))
5719
continue;
5720
5721
targ_essent_len = bpf_core_essential_name_len(targ_name);
5722
if (targ_essent_len != local_essent_len)
5723
continue;
5724
5725
if (strncmp(local_name, targ_name, local_essent_len) != 0)
5726
continue;
5727
5728
pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5729
local_cand->id, btf_kind_str(local_t),
5730
local_name, i, btf_kind_str(t), targ_name,
5731
targ_btf_name);
5732
new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5733
sizeof(*cands->cands));
5734
if (!new_cands)
5735
return -ENOMEM;
5736
5737
cand = &new_cands[cands->len];
5738
cand->btf = targ_btf;
5739
cand->id = i;
5740
5741
cands->cands = new_cands;
5742
cands->len++;
5743
}
5744
return 0;
5745
}
5746
5747
static int load_module_btfs(struct bpf_object *obj)
5748
{
5749
struct bpf_btf_info info;
5750
struct module_btf *mod_btf;
5751
struct btf *btf;
5752
char name[64];
5753
__u32 id = 0, len;
5754
int err, fd;
5755
5756
if (obj->btf_modules_loaded)
5757
return 0;
5758
5759
if (obj->gen_loader)
5760
return 0;
5761
5762
/* don't do this again, even if we find no module BTFs */
5763
obj->btf_modules_loaded = true;
5764
5765
/* kernel too old to support module BTFs */
5766
if (!kernel_supports(obj, FEAT_MODULE_BTF))
5767
return 0;
5768
5769
while (true) {
5770
err = bpf_btf_get_next_id(id, &id);
5771
if (err && errno == ENOENT)
5772
return 0;
5773
if (err && errno == EPERM) {
5774
pr_debug("skipping module BTFs loading, missing privileges\n");
5775
return 0;
5776
}
5777
if (err) {
5778
err = -errno;
5779
pr_warn("failed to iterate BTF objects: %s\n", errstr(err));
5780
return err;
5781
}
5782
5783
fd = bpf_btf_get_fd_by_id(id);
5784
if (fd < 0) {
5785
if (errno == ENOENT)
5786
continue; /* expected race: BTF was unloaded */
5787
err = -errno;
5788
pr_warn("failed to get BTF object #%d FD: %s\n", id, errstr(err));
5789
return err;
5790
}
5791
5792
len = sizeof(info);
5793
memset(&info, 0, sizeof(info));
5794
info.name = ptr_to_u64(name);
5795
info.name_len = sizeof(name);
5796
5797
err = bpf_btf_get_info_by_fd(fd, &info, &len);
5798
if (err) {
5799
err = -errno;
5800
pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err));
5801
goto err_out;
5802
}
5803
5804
/* ignore non-module BTFs */
5805
if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5806
close(fd);
5807
continue;
5808
}
5809
5810
btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5811
err = libbpf_get_error(btf);
5812
if (err) {
5813
pr_warn("failed to load module [%s]'s BTF object #%d: %s\n",
5814
name, id, errstr(err));
5815
goto err_out;
5816
}
5817
5818
err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5819
sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5820
if (err)
5821
goto err_out;
5822
5823
mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5824
5825
mod_btf->btf = btf;
5826
mod_btf->id = id;
5827
mod_btf->fd = fd;
5828
mod_btf->name = strdup(name);
5829
if (!mod_btf->name) {
5830
err = -ENOMEM;
5831
goto err_out;
5832
}
5833
continue;
5834
5835
err_out:
5836
close(fd);
5837
return err;
5838
}
5839
5840
return 0;
5841
}
5842
5843
static struct bpf_core_cand_list *
5844
bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5845
{
5846
struct bpf_core_cand local_cand = {};
5847
struct bpf_core_cand_list *cands;
5848
const struct btf *main_btf;
5849
const struct btf_type *local_t;
5850
const char *local_name;
5851
size_t local_essent_len;
5852
int err, i;
5853
5854
local_cand.btf = local_btf;
5855
local_cand.id = local_type_id;
5856
local_t = btf__type_by_id(local_btf, local_type_id);
5857
if (!local_t)
5858
return ERR_PTR(-EINVAL);
5859
5860
local_name = btf__name_by_offset(local_btf, local_t->name_off);
5861
if (str_is_empty(local_name))
5862
return ERR_PTR(-EINVAL);
5863
local_essent_len = bpf_core_essential_name_len(local_name);
5864
5865
cands = calloc(1, sizeof(*cands));
5866
if (!cands)
5867
return ERR_PTR(-ENOMEM);
5868
5869
/* Attempt to find target candidates in vmlinux BTF first */
5870
main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5871
err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5872
if (err)
5873
goto err_out;
5874
5875
/* if vmlinux BTF has any candidate, don't got for module BTFs */
5876
if (cands->len)
5877
return cands;
5878
5879
/* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5880
if (obj->btf_vmlinux_override)
5881
return cands;
5882
5883
/* now look through module BTFs, trying to still find candidates */
5884
err = load_module_btfs(obj);
5885
if (err)
5886
goto err_out;
5887
5888
for (i = 0; i < obj->btf_module_cnt; i++) {
5889
err = bpf_core_add_cands(&local_cand, local_essent_len,
5890
obj->btf_modules[i].btf,
5891
obj->btf_modules[i].name,
5892
btf__type_cnt(obj->btf_vmlinux),
5893
cands);
5894
if (err)
5895
goto err_out;
5896
}
5897
5898
return cands;
5899
err_out:
5900
bpf_core_free_cands(cands);
5901
return ERR_PTR(err);
5902
}
5903
5904
/* Check local and target types for compatibility. This check is used for
5905
* type-based CO-RE relocations and follow slightly different rules than
5906
* field-based relocations. This function assumes that root types were already
5907
* checked for name match. Beyond that initial root-level name check, names
5908
* are completely ignored. Compatibility rules are as follows:
5909
* - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5910
* kind should match for local and target types (i.e., STRUCT is not
5911
* compatible with UNION);
5912
* - for ENUMs, the size is ignored;
5913
* - for INT, size and signedness are ignored;
5914
* - for ARRAY, dimensionality is ignored, element types are checked for
5915
* compatibility recursively;
5916
* - CONST/VOLATILE/RESTRICT modifiers are ignored;
5917
* - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5918
* - FUNC_PROTOs are compatible if they have compatible signature: same
5919
* number of input args and compatible return and argument types.
5920
* These rules are not set in stone and probably will be adjusted as we get
5921
* more experience with using BPF CO-RE relocations.
5922
*/
5923
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5924
const struct btf *targ_btf, __u32 targ_id)
5925
{
5926
return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5927
}
5928
5929
int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5930
const struct btf *targ_btf, __u32 targ_id)
5931
{
5932
return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5933
}
5934
5935
static size_t bpf_core_hash_fn(const long key, void *ctx)
5936
{
5937
return key;
5938
}
5939
5940
static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
5941
{
5942
return k1 == k2;
5943
}
5944
5945
static int record_relo_core(struct bpf_program *prog,
5946
const struct bpf_core_relo *core_relo, int insn_idx)
5947
{
5948
struct reloc_desc *relos, *relo;
5949
5950
relos = libbpf_reallocarray(prog->reloc_desc,
5951
prog->nr_reloc + 1, sizeof(*relos));
5952
if (!relos)
5953
return -ENOMEM;
5954
relo = &relos[prog->nr_reloc];
5955
relo->type = RELO_CORE;
5956
relo->insn_idx = insn_idx;
5957
relo->core_relo = core_relo;
5958
prog->reloc_desc = relos;
5959
prog->nr_reloc++;
5960
return 0;
5961
}
5962
5963
static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5964
{
5965
struct reloc_desc *relo;
5966
int i;
5967
5968
for (i = 0; i < prog->nr_reloc; i++) {
5969
relo = &prog->reloc_desc[i];
5970
if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5971
continue;
5972
5973
return relo->core_relo;
5974
}
5975
5976
return NULL;
5977
}
5978
5979
static int bpf_core_resolve_relo(struct bpf_program *prog,
5980
const struct bpf_core_relo *relo,
5981
int relo_idx,
5982
const struct btf *local_btf,
5983
struct hashmap *cand_cache,
5984
struct bpf_core_relo_res *targ_res)
5985
{
5986
struct bpf_core_spec specs_scratch[3] = {};
5987
struct bpf_core_cand_list *cands = NULL;
5988
const char *prog_name = prog->name;
5989
const struct btf_type *local_type;
5990
const char *local_name;
5991
__u32 local_id = relo->type_id;
5992
int err;
5993
5994
local_type = btf__type_by_id(local_btf, local_id);
5995
if (!local_type)
5996
return -EINVAL;
5997
5998
local_name = btf__name_by_offset(local_btf, local_type->name_off);
5999
if (!local_name)
6000
return -EINVAL;
6001
6002
if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
6003
!hashmap__find(cand_cache, local_id, &cands)) {
6004
cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
6005
if (IS_ERR(cands)) {
6006
pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
6007
prog_name, relo_idx, local_id, btf_kind_str(local_type),
6008
local_name, PTR_ERR(cands));
6009
return PTR_ERR(cands);
6010
}
6011
err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
6012
if (err) {
6013
bpf_core_free_cands(cands);
6014
return err;
6015
}
6016
}
6017
6018
return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
6019
targ_res);
6020
}
6021
6022
static int
6023
bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
6024
{
6025
const struct btf_ext_info_sec *sec;
6026
struct bpf_core_relo_res targ_res;
6027
const struct bpf_core_relo *rec;
6028
const struct btf_ext_info *seg;
6029
struct hashmap_entry *entry;
6030
struct hashmap *cand_cache = NULL;
6031
struct bpf_program *prog;
6032
struct bpf_insn *insn;
6033
const char *sec_name;
6034
int i, err = 0, insn_idx, sec_idx, sec_num;
6035
6036
if (obj->btf_ext->core_relo_info.len == 0)
6037
return 0;
6038
6039
if (targ_btf_path) {
6040
obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
6041
err = libbpf_get_error(obj->btf_vmlinux_override);
6042
if (err) {
6043
pr_warn("failed to parse target BTF: %s\n", errstr(err));
6044
return err;
6045
}
6046
}
6047
6048
cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
6049
if (IS_ERR(cand_cache)) {
6050
err = PTR_ERR(cand_cache);
6051
goto out;
6052
}
6053
6054
seg = &obj->btf_ext->core_relo_info;
6055
sec_num = 0;
6056
for_each_btf_ext_sec(seg, sec) {
6057
sec_idx = seg->sec_idxs[sec_num];
6058
sec_num++;
6059
6060
sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
6061
if (str_is_empty(sec_name)) {
6062
err = -EINVAL;
6063
goto out;
6064
}
6065
6066
pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
6067
6068
for_each_btf_ext_rec(seg, sec, i, rec) {
6069
if (rec->insn_off % BPF_INSN_SZ)
6070
return -EINVAL;
6071
insn_idx = rec->insn_off / BPF_INSN_SZ;
6072
prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
6073
if (!prog) {
6074
/* When __weak subprog is "overridden" by another instance
6075
* of the subprog from a different object file, linker still
6076
* appends all the .BTF.ext info that used to belong to that
6077
* eliminated subprogram.
6078
* This is similar to what x86-64 linker does for relocations.
6079
* So just ignore such relocations just like we ignore
6080
* subprog instructions when discovering subprograms.
6081
*/
6082
pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
6083
sec_name, i, insn_idx);
6084
continue;
6085
}
6086
/* no need to apply CO-RE relocation if the program is
6087
* not going to be loaded
6088
*/
6089
if (!prog->autoload)
6090
continue;
6091
6092
/* adjust insn_idx from section frame of reference to the local
6093
* program's frame of reference; (sub-)program code is not yet
6094
* relocated, so it's enough to just subtract in-section offset
6095
*/
6096
insn_idx = insn_idx - prog->sec_insn_off;
6097
if (insn_idx >= prog->insns_cnt)
6098
return -EINVAL;
6099
insn = &prog->insns[insn_idx];
6100
6101
err = record_relo_core(prog, rec, insn_idx);
6102
if (err) {
6103
pr_warn("prog '%s': relo #%d: failed to record relocation: %s\n",
6104
prog->name, i, errstr(err));
6105
goto out;
6106
}
6107
6108
if (prog->obj->gen_loader)
6109
continue;
6110
6111
err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
6112
if (err) {
6113
pr_warn("prog '%s': relo #%d: failed to relocate: %s\n",
6114
prog->name, i, errstr(err));
6115
goto out;
6116
}
6117
6118
err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
6119
if (err) {
6120
pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %s\n",
6121
prog->name, i, insn_idx, errstr(err));
6122
goto out;
6123
}
6124
}
6125
}
6126
6127
out:
6128
/* obj->btf_vmlinux and module BTFs are freed after object load */
6129
btf__free(obj->btf_vmlinux_override);
6130
obj->btf_vmlinux_override = NULL;
6131
6132
if (!IS_ERR_OR_NULL(cand_cache)) {
6133
hashmap__for_each_entry(cand_cache, entry, i) {
6134
bpf_core_free_cands(entry->pvalue);
6135
}
6136
hashmap__free(cand_cache);
6137
}
6138
return err;
6139
}
6140
6141
/* base map load ldimm64 special constant, used also for log fixup logic */
6142
#define POISON_LDIMM64_MAP_BASE 2001000000
6143
#define POISON_LDIMM64_MAP_PFX "200100"
6144
6145
static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
6146
int insn_idx, struct bpf_insn *insn,
6147
int map_idx, const struct bpf_map *map)
6148
{
6149
int i;
6150
6151
pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
6152
prog->name, relo_idx, insn_idx, map_idx, map->name);
6153
6154
/* we turn single ldimm64 into two identical invalid calls */
6155
for (i = 0; i < 2; i++) {
6156
insn->code = BPF_JMP | BPF_CALL;
6157
insn->dst_reg = 0;
6158
insn->src_reg = 0;
6159
insn->off = 0;
6160
/* if this instruction is reachable (not a dead code),
6161
* verifier will complain with something like:
6162
* invalid func unknown#2001000123
6163
* where lower 123 is map index into obj->maps[] array
6164
*/
6165
insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
6166
6167
insn++;
6168
}
6169
}
6170
6171
/* unresolved kfunc call special constant, used also for log fixup logic */
6172
#define POISON_CALL_KFUNC_BASE 2002000000
6173
#define POISON_CALL_KFUNC_PFX "2002"
6174
6175
static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
6176
int insn_idx, struct bpf_insn *insn,
6177
int ext_idx, const struct extern_desc *ext)
6178
{
6179
pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
6180
prog->name, relo_idx, insn_idx, ext->name);
6181
6182
/* we turn kfunc call into invalid helper call with identifiable constant */
6183
insn->code = BPF_JMP | BPF_CALL;
6184
insn->dst_reg = 0;
6185
insn->src_reg = 0;
6186
insn->off = 0;
6187
/* if this instruction is reachable (not a dead code),
6188
* verifier will complain with something like:
6189
* invalid func unknown#2001000123
6190
* where lower 123 is extern index into obj->externs[] array
6191
*/
6192
insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
6193
}
6194
6195
static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off)
6196
{
6197
size_t i;
6198
6199
for (i = 0; i < obj->jumptable_map_cnt; i++) {
6200
/*
6201
* This might happen that same offset is used for two different
6202
* programs (as jump tables can be the same). However, for
6203
* different programs different maps should be created.
6204
*/
6205
if (obj->jumptable_maps[i].sym_off == sym_off &&
6206
obj->jumptable_maps[i].prog == prog)
6207
return obj->jumptable_maps[i].fd;
6208
}
6209
6210
return -ENOENT;
6211
}
6212
6213
static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off, int map_fd)
6214
{
6215
size_t cnt = obj->jumptable_map_cnt;
6216
size_t size = sizeof(obj->jumptable_maps[0]);
6217
void *tmp;
6218
6219
tmp = libbpf_reallocarray(obj->jumptable_maps, cnt + 1, size);
6220
if (!tmp)
6221
return -ENOMEM;
6222
6223
obj->jumptable_maps = tmp;
6224
obj->jumptable_maps[cnt].prog = prog;
6225
obj->jumptable_maps[cnt].sym_off = sym_off;
6226
obj->jumptable_maps[cnt].fd = map_fd;
6227
obj->jumptable_map_cnt++;
6228
6229
return 0;
6230
}
6231
6232
static int find_subprog_idx(struct bpf_program *prog, int insn_idx)
6233
{
6234
int i;
6235
6236
for (i = prog->subprog_cnt - 1; i >= 0; i--) {
6237
if (insn_idx >= prog->subprogs[i].sub_insn_off)
6238
return i;
6239
}
6240
6241
return -1;
6242
}
6243
6244
static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo)
6245
{
6246
const __u32 jt_entry_size = 8;
6247
int sym_off = relo->sym_off;
6248
int jt_size = relo->sym_size;
6249
__u32 max_entries = jt_size / jt_entry_size;
6250
__u32 value_size = sizeof(struct bpf_insn_array_value);
6251
struct bpf_insn_array_value val = {};
6252
int subprog_idx;
6253
int map_fd, err;
6254
__u64 insn_off;
6255
__u64 *jt;
6256
__u32 i;
6257
6258
map_fd = find_jt_map(obj, prog, sym_off);
6259
if (map_fd >= 0)
6260
return map_fd;
6261
6262
if (sym_off % jt_entry_size) {
6263
pr_warn("map '.jumptables': jumptable start %d should be multiple of %u\n",
6264
sym_off, jt_entry_size);
6265
return -EINVAL;
6266
}
6267
6268
if (jt_size % jt_entry_size) {
6269
pr_warn("map '.jumptables': jumptable size %d should be multiple of %u\n",
6270
jt_size, jt_entry_size);
6271
return -EINVAL;
6272
}
6273
6274
map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables",
6275
4, value_size, max_entries, NULL);
6276
if (map_fd < 0)
6277
return map_fd;
6278
6279
if (!obj->jumptables_data) {
6280
pr_warn("map '.jumptables': ELF file is missing jump table data\n");
6281
err = -EINVAL;
6282
goto err_close;
6283
}
6284
if (sym_off + jt_size > obj->jumptables_data_sz) {
6285
pr_warn("map '.jumptables': jumptables_data size is %zd, trying to access %d\n",
6286
obj->jumptables_data_sz, sym_off + jt_size);
6287
err = -EINVAL;
6288
goto err_close;
6289
}
6290
6291
subprog_idx = -1; /* main program */
6292
if (relo->insn_idx < 0 || relo->insn_idx >= prog->insns_cnt) {
6293
pr_warn("map '.jumptables': invalid instruction index %d\n", relo->insn_idx);
6294
err = -EINVAL;
6295
goto err_close;
6296
}
6297
if (prog->subprogs)
6298
subprog_idx = find_subprog_idx(prog, relo->insn_idx);
6299
6300
jt = (__u64 *)(obj->jumptables_data + sym_off);
6301
for (i = 0; i < max_entries; i++) {
6302
/*
6303
* The offset should be made to be relative to the beginning of
6304
* the main function, not the subfunction.
6305
*/
6306
insn_off = jt[i]/sizeof(struct bpf_insn);
6307
if (subprog_idx >= 0) {
6308
insn_off -= prog->subprogs[subprog_idx].sec_insn_off;
6309
insn_off += prog->subprogs[subprog_idx].sub_insn_off;
6310
} else {
6311
insn_off -= prog->sec_insn_off;
6312
}
6313
6314
/*
6315
* LLVM-generated jump tables contain u64 records, however
6316
* should contain values that fit in u32.
6317
*/
6318
if (insn_off > UINT32_MAX) {
6319
pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %d\n",
6320
(long long)jt[i], sym_off + i * jt_entry_size);
6321
err = -EINVAL;
6322
goto err_close;
6323
}
6324
6325
val.orig_off = insn_off;
6326
err = bpf_map_update_elem(map_fd, &i, &val, 0);
6327
if (err)
6328
goto err_close;
6329
}
6330
6331
err = bpf_map_freeze(map_fd);
6332
if (err)
6333
goto err_close;
6334
6335
err = add_jt_map(obj, prog, sym_off, map_fd);
6336
if (err)
6337
goto err_close;
6338
6339
return map_fd;
6340
6341
err_close:
6342
close(map_fd);
6343
return err;
6344
}
6345
6346
/* Relocate data references within program code:
6347
* - map references;
6348
* - global variable references;
6349
* - extern references.
6350
*/
6351
static int
6352
bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
6353
{
6354
int i;
6355
6356
for (i = 0; i < prog->nr_reloc; i++) {
6357
struct reloc_desc *relo = &prog->reloc_desc[i];
6358
struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6359
const struct bpf_map *map;
6360
struct extern_desc *ext;
6361
6362
switch (relo->type) {
6363
case RELO_LD64:
6364
map = &obj->maps[relo->map_idx];
6365
if (obj->gen_loader) {
6366
insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
6367
insn[0].imm = relo->map_idx;
6368
} else if (map->autocreate) {
6369
insn[0].src_reg = BPF_PSEUDO_MAP_FD;
6370
insn[0].imm = map->fd;
6371
} else {
6372
poison_map_ldimm64(prog, i, relo->insn_idx, insn,
6373
relo->map_idx, map);
6374
}
6375
break;
6376
case RELO_DATA:
6377
map = &obj->maps[relo->map_idx];
6378
insn[1].imm = insn[0].imm + relo->sym_off;
6379
if (obj->gen_loader) {
6380
insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6381
insn[0].imm = relo->map_idx;
6382
} else if (map->autocreate) {
6383
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6384
insn[0].imm = map->fd;
6385
} else {
6386
poison_map_ldimm64(prog, i, relo->insn_idx, insn,
6387
relo->map_idx, map);
6388
}
6389
break;
6390
case RELO_EXTERN_LD64:
6391
ext = &obj->externs[relo->ext_idx];
6392
if (ext->type == EXT_KCFG) {
6393
if (obj->gen_loader) {
6394
insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6395
insn[0].imm = obj->kconfig_map_idx;
6396
} else {
6397
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6398
insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
6399
}
6400
insn[1].imm = ext->kcfg.data_off;
6401
} else /* EXT_KSYM */ {
6402
if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
6403
insn[0].src_reg = BPF_PSEUDO_BTF_ID;
6404
insn[0].imm = ext->ksym.kernel_btf_id;
6405
insn[1].imm = ext->ksym.kernel_btf_obj_fd;
6406
} else { /* typeless ksyms or unresolved typed ksyms */
6407
insn[0].imm = (__u32)ext->ksym.addr;
6408
insn[1].imm = ext->ksym.addr >> 32;
6409
}
6410
}
6411
break;
6412
case RELO_EXTERN_CALL:
6413
ext = &obj->externs[relo->ext_idx];
6414
insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
6415
if (ext->is_set) {
6416
insn[0].imm = ext->ksym.kernel_btf_id;
6417
insn[0].off = ext->ksym.btf_fd_idx;
6418
} else { /* unresolved weak kfunc call */
6419
poison_kfunc_call(prog, i, relo->insn_idx, insn,
6420
relo->ext_idx, ext);
6421
}
6422
break;
6423
case RELO_SUBPROG_ADDR:
6424
if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
6425
pr_warn("prog '%s': relo #%d: bad insn\n",
6426
prog->name, i);
6427
return -EINVAL;
6428
}
6429
/* handled already */
6430
break;
6431
case RELO_CALL:
6432
/* handled already */
6433
break;
6434
case RELO_CORE:
6435
/* will be handled by bpf_program_record_relos() */
6436
break;
6437
case RELO_INSN_ARRAY: {
6438
int map_fd;
6439
6440
map_fd = create_jt_map(obj, prog, relo);
6441
if (map_fd < 0) {
6442
pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n",
6443
prog->name, i, relo->sym_off);
6444
return map_fd;
6445
}
6446
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6447
insn->imm = map_fd;
6448
insn->off = 0;
6449
}
6450
break;
6451
default:
6452
pr_warn("prog '%s': relo #%d: bad relo type %d\n",
6453
prog->name, i, relo->type);
6454
return -EINVAL;
6455
}
6456
}
6457
6458
return 0;
6459
}
6460
6461
static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
6462
const struct bpf_program *prog,
6463
const struct btf_ext_info *ext_info,
6464
void **prog_info, __u32 *prog_rec_cnt,
6465
__u32 *prog_rec_sz)
6466
{
6467
void *copy_start = NULL, *copy_end = NULL;
6468
void *rec, *rec_end, *new_prog_info;
6469
const struct btf_ext_info_sec *sec;
6470
size_t old_sz, new_sz;
6471
int i, sec_num, sec_idx, off_adj;
6472
6473
sec_num = 0;
6474
for_each_btf_ext_sec(ext_info, sec) {
6475
sec_idx = ext_info->sec_idxs[sec_num];
6476
sec_num++;
6477
if (prog->sec_idx != sec_idx)
6478
continue;
6479
6480
for_each_btf_ext_rec(ext_info, sec, i, rec) {
6481
__u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
6482
6483
if (insn_off < prog->sec_insn_off)
6484
continue;
6485
if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
6486
break;
6487
6488
if (!copy_start)
6489
copy_start = rec;
6490
copy_end = rec + ext_info->rec_size;
6491
}
6492
6493
if (!copy_start)
6494
return -ENOENT;
6495
6496
/* append func/line info of a given (sub-)program to the main
6497
* program func/line info
6498
*/
6499
old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6500
new_sz = old_sz + (copy_end - copy_start);
6501
new_prog_info = realloc(*prog_info, new_sz);
6502
if (!new_prog_info)
6503
return -ENOMEM;
6504
*prog_info = new_prog_info;
6505
*prog_rec_cnt = new_sz / ext_info->rec_size;
6506
memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6507
6508
/* Kernel instruction offsets are in units of 8-byte
6509
* instructions, while .BTF.ext instruction offsets generated
6510
* by Clang are in units of bytes. So convert Clang offsets
6511
* into kernel offsets and adjust offset according to program
6512
* relocated position.
6513
*/
6514
off_adj = prog->sub_insn_off - prog->sec_insn_off;
6515
rec = new_prog_info + old_sz;
6516
rec_end = new_prog_info + new_sz;
6517
for (; rec < rec_end; rec += ext_info->rec_size) {
6518
__u32 *insn_off = rec;
6519
6520
*insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6521
}
6522
*prog_rec_sz = ext_info->rec_size;
6523
return 0;
6524
}
6525
6526
return -ENOENT;
6527
}
6528
6529
static int
6530
reloc_prog_func_and_line_info(const struct bpf_object *obj,
6531
struct bpf_program *main_prog,
6532
const struct bpf_program *prog)
6533
{
6534
int err;
6535
6536
/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6537
* support func/line info
6538
*/
6539
if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
6540
return 0;
6541
6542
/* only attempt func info relocation if main program's func_info
6543
* relocation was successful
6544
*/
6545
if (main_prog != prog && !main_prog->func_info)
6546
goto line_info;
6547
6548
err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6549
&main_prog->func_info,
6550
&main_prog->func_info_cnt,
6551
&main_prog->func_info_rec_size);
6552
if (err) {
6553
if (err != -ENOENT) {
6554
pr_warn("prog '%s': error relocating .BTF.ext function info: %s\n",
6555
prog->name, errstr(err));
6556
return err;
6557
}
6558
if (main_prog->func_info) {
6559
/*
6560
* Some info has already been found but has problem
6561
* in the last btf_ext reloc. Must have to error out.
6562
*/
6563
pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6564
return err;
6565
}
6566
/* Have problem loading the very first info. Ignore the rest. */
6567
pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6568
prog->name);
6569
}
6570
6571
line_info:
6572
/* don't relocate line info if main program's relocation failed */
6573
if (main_prog != prog && !main_prog->line_info)
6574
return 0;
6575
6576
err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6577
&main_prog->line_info,
6578
&main_prog->line_info_cnt,
6579
&main_prog->line_info_rec_size);
6580
if (err) {
6581
if (err != -ENOENT) {
6582
pr_warn("prog '%s': error relocating .BTF.ext line info: %s\n",
6583
prog->name, errstr(err));
6584
return err;
6585
}
6586
if (main_prog->line_info) {
6587
/*
6588
* Some info has already been found but has problem
6589
* in the last btf_ext reloc. Must have to error out.
6590
*/
6591
pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6592
return err;
6593
}
6594
/* Have problem loading the very first info. Ignore the rest. */
6595
pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6596
prog->name);
6597
}
6598
return 0;
6599
}
6600
6601
static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6602
{
6603
size_t insn_idx = *(const size_t *)key;
6604
const struct reloc_desc *relo = elem;
6605
6606
if (insn_idx == relo->insn_idx)
6607
return 0;
6608
return insn_idx < relo->insn_idx ? -1 : 1;
6609
}
6610
6611
static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6612
{
6613
if (!prog->nr_reloc)
6614
return NULL;
6615
return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6616
sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6617
}
6618
6619
static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6620
{
6621
int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6622
struct reloc_desc *relos;
6623
int i;
6624
6625
if (main_prog == subprog)
6626
return 0;
6627
relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6628
/* if new count is zero, reallocarray can return a valid NULL result;
6629
* in this case the previous pointer will be freed, so we *have to*
6630
* reassign old pointer to the new value (even if it's NULL)
6631
*/
6632
if (!relos && new_cnt)
6633
return -ENOMEM;
6634
if (subprog->nr_reloc)
6635
memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6636
sizeof(*relos) * subprog->nr_reloc);
6637
6638
for (i = main_prog->nr_reloc; i < new_cnt; i++)
6639
relos[i].insn_idx += subprog->sub_insn_off;
6640
/* After insn_idx adjustment the 'relos' array is still sorted
6641
* by insn_idx and doesn't break bsearch.
6642
*/
6643
main_prog->reloc_desc = relos;
6644
main_prog->nr_reloc = new_cnt;
6645
return 0;
6646
}
6647
6648
static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog)
6649
{
6650
size_t size = sizeof(main_prog->subprogs[0]);
6651
int cnt = main_prog->subprog_cnt;
6652
void *tmp;
6653
6654
tmp = libbpf_reallocarray(main_prog->subprogs, cnt + 1, size);
6655
if (!tmp)
6656
return -ENOMEM;
6657
6658
main_prog->subprogs = tmp;
6659
main_prog->subprogs[cnt].sec_insn_off = subprog->sec_insn_off;
6660
main_prog->subprogs[cnt].sub_insn_off = subprog->sub_insn_off;
6661
main_prog->subprog_cnt++;
6662
6663
return 0;
6664
}
6665
6666
static int
6667
bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
6668
struct bpf_program *subprog)
6669
{
6670
struct bpf_insn *insns;
6671
size_t new_cnt;
6672
int err;
6673
6674
subprog->sub_insn_off = main_prog->insns_cnt;
6675
6676
new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6677
insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6678
if (!insns) {
6679
pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6680
return -ENOMEM;
6681
}
6682
main_prog->insns = insns;
6683
main_prog->insns_cnt = new_cnt;
6684
6685
memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6686
subprog->insns_cnt * sizeof(*insns));
6687
6688
pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6689
main_prog->name, subprog->insns_cnt, subprog->name);
6690
6691
/* The subprog insns are now appended. Append its relos too. */
6692
err = append_subprog_relos(main_prog, subprog);
6693
if (err)
6694
return err;
6695
6696
err = save_subprog_offsets(main_prog, subprog);
6697
if (err) {
6698
pr_warn("prog '%s': failed to add subprog offsets: %s\n",
6699
main_prog->name, errstr(err));
6700
return err;
6701
}
6702
6703
return 0;
6704
}
6705
6706
static int
6707
bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6708
struct bpf_program *prog)
6709
{
6710
size_t sub_insn_idx, insn_idx;
6711
struct bpf_program *subprog;
6712
struct reloc_desc *relo;
6713
struct bpf_insn *insn;
6714
int err;
6715
6716
err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6717
if (err)
6718
return err;
6719
6720
for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6721
insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6722
if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6723
continue;
6724
6725
relo = find_prog_insn_relo(prog, insn_idx);
6726
if (relo && relo->type == RELO_EXTERN_CALL)
6727
/* kfunc relocations will be handled later
6728
* in bpf_object__relocate_data()
6729
*/
6730
continue;
6731
if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6732
pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6733
prog->name, insn_idx, relo->type);
6734
return -LIBBPF_ERRNO__RELOC;
6735
}
6736
if (relo) {
6737
/* sub-program instruction index is a combination of
6738
* an offset of a symbol pointed to by relocation and
6739
* call instruction's imm field; for global functions,
6740
* call always has imm = -1, but for static functions
6741
* relocation is against STT_SECTION and insn->imm
6742
* points to a start of a static function
6743
*
6744
* for subprog addr relocation, the relo->sym_off + insn->imm is
6745
* the byte offset in the corresponding section.
6746
*/
6747
if (relo->type == RELO_CALL)
6748
sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6749
else
6750
sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6751
} else if (insn_is_pseudo_func(insn)) {
6752
/*
6753
* RELO_SUBPROG_ADDR relo is always emitted even if both
6754
* functions are in the same section, so it shouldn't reach here.
6755
*/
6756
pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6757
prog->name, insn_idx);
6758
return -LIBBPF_ERRNO__RELOC;
6759
} else {
6760
/* if subprogram call is to a static function within
6761
* the same ELF section, there won't be any relocation
6762
* emitted, but it also means there is no additional
6763
* offset necessary, insns->imm is relative to
6764
* instruction's original position within the section
6765
*/
6766
sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6767
}
6768
6769
/* we enforce that sub-programs should be in .text section */
6770
subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6771
if (!subprog) {
6772
pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6773
prog->name);
6774
return -LIBBPF_ERRNO__RELOC;
6775
}
6776
6777
/* if it's the first call instruction calling into this
6778
* subprogram (meaning this subprog hasn't been processed
6779
* yet) within the context of current main program:
6780
* - append it at the end of main program's instructions blog;
6781
* - process is recursively, while current program is put on hold;
6782
* - if that subprogram calls some other not yet processes
6783
* subprogram, same thing will happen recursively until
6784
* there are no more unprocesses subprograms left to append
6785
* and relocate.
6786
*/
6787
if (subprog->sub_insn_off == 0) {
6788
err = bpf_object__append_subprog_code(obj, main_prog, subprog);
6789
if (err)
6790
return err;
6791
err = bpf_object__reloc_code(obj, main_prog, subprog);
6792
if (err)
6793
return err;
6794
}
6795
6796
/* main_prog->insns memory could have been re-allocated, so
6797
* calculate pointer again
6798
*/
6799
insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6800
/* calculate correct instruction position within current main
6801
* prog; each main prog can have a different set of
6802
* subprograms appended (potentially in different order as
6803
* well), so position of any subprog can be different for
6804
* different main programs
6805
*/
6806
insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6807
6808
pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6809
prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6810
}
6811
6812
return 0;
6813
}
6814
6815
/*
6816
* Relocate sub-program calls.
6817
*
6818
* Algorithm operates as follows. Each entry-point BPF program (referred to as
6819
* main prog) is processed separately. For each subprog (non-entry functions,
6820
* that can be called from either entry progs or other subprogs) gets their
6821
* sub_insn_off reset to zero. This serves as indicator that this subprogram
6822
* hasn't been yet appended and relocated within current main prog. Once its
6823
* relocated, sub_insn_off will point at the position within current main prog
6824
* where given subprog was appended. This will further be used to relocate all
6825
* the call instructions jumping into this subprog.
6826
*
6827
* We start with main program and process all call instructions. If the call
6828
* is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6829
* is zero), subprog instructions are appended at the end of main program's
6830
* instruction array. Then main program is "put on hold" while we recursively
6831
* process newly appended subprogram. If that subprogram calls into another
6832
* subprogram that hasn't been appended, new subprogram is appended again to
6833
* the *main* prog's instructions (subprog's instructions are always left
6834
* untouched, as they need to be in unmodified state for subsequent main progs
6835
* and subprog instructions are always sent only as part of a main prog) and
6836
* the process continues recursively. Once all the subprogs called from a main
6837
* prog or any of its subprogs are appended (and relocated), all their
6838
* positions within finalized instructions array are known, so it's easy to
6839
* rewrite call instructions with correct relative offsets, corresponding to
6840
* desired target subprog.
6841
*
6842
* Its important to realize that some subprogs might not be called from some
6843
* main prog and any of its called/used subprogs. Those will keep their
6844
* subprog->sub_insn_off as zero at all times and won't be appended to current
6845
* main prog and won't be relocated within the context of current main prog.
6846
* They might still be used from other main progs later.
6847
*
6848
* Visually this process can be shown as below. Suppose we have two main
6849
* programs mainA and mainB and BPF object contains three subprogs: subA,
6850
* subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6851
* subC both call subB:
6852
*
6853
* +--------+ +-------+
6854
* | v v |
6855
* +--+---+ +--+-+-+ +---+--+
6856
* | subA | | subB | | subC |
6857
* +--+---+ +------+ +---+--+
6858
* ^ ^
6859
* | |
6860
* +---+-------+ +------+----+
6861
* | mainA | | mainB |
6862
* +-----------+ +-----------+
6863
*
6864
* We'll start relocating mainA, will find subA, append it and start
6865
* processing sub A recursively:
6866
*
6867
* +-----------+------+
6868
* | mainA | subA |
6869
* +-----------+------+
6870
*
6871
* At this point we notice that subB is used from subA, so we append it and
6872
* relocate (there are no further subcalls from subB):
6873
*
6874
* +-----------+------+------+
6875
* | mainA | subA | subB |
6876
* +-----------+------+------+
6877
*
6878
* At this point, we relocate subA calls, then go one level up and finish with
6879
* relocatin mainA calls. mainA is done.
6880
*
6881
* For mainB process is similar but results in different order. We start with
6882
* mainB and skip subA and subB, as mainB never calls them (at least
6883
* directly), but we see subC is needed, so we append and start processing it:
6884
*
6885
* +-----------+------+
6886
* | mainB | subC |
6887
* +-----------+------+
6888
* Now we see subC needs subB, so we go back to it, append and relocate it:
6889
*
6890
* +-----------+------+------+
6891
* | mainB | subC | subB |
6892
* +-----------+------+------+
6893
*
6894
* At this point we unwind recursion, relocate calls in subC, then in mainB.
6895
*/
6896
static int
6897
bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6898
{
6899
struct bpf_program *subprog;
6900
int i, err;
6901
6902
/* mark all subprogs as not relocated (yet) within the context of
6903
* current main program
6904
*/
6905
for (i = 0; i < obj->nr_programs; i++) {
6906
subprog = &obj->programs[i];
6907
if (!prog_is_subprog(obj, subprog))
6908
continue;
6909
6910
subprog->sub_insn_off = 0;
6911
}
6912
6913
err = bpf_object__reloc_code(obj, prog, prog);
6914
if (err)
6915
return err;
6916
6917
return 0;
6918
}
6919
6920
static void
6921
bpf_object__free_relocs(struct bpf_object *obj)
6922
{
6923
struct bpf_program *prog;
6924
int i;
6925
6926
/* free up relocation descriptors */
6927
for (i = 0; i < obj->nr_programs; i++) {
6928
prog = &obj->programs[i];
6929
zfree(&prog->reloc_desc);
6930
prog->nr_reloc = 0;
6931
}
6932
}
6933
6934
static int cmp_relocs(const void *_a, const void *_b)
6935
{
6936
const struct reloc_desc *a = _a;
6937
const struct reloc_desc *b = _b;
6938
6939
if (a->insn_idx != b->insn_idx)
6940
return a->insn_idx < b->insn_idx ? -1 : 1;
6941
6942
/* no two relocations should have the same insn_idx, but ... */
6943
if (a->type != b->type)
6944
return a->type < b->type ? -1 : 1;
6945
6946
return 0;
6947
}
6948
6949
static void bpf_object__sort_relos(struct bpf_object *obj)
6950
{
6951
int i;
6952
6953
for (i = 0; i < obj->nr_programs; i++) {
6954
struct bpf_program *p = &obj->programs[i];
6955
6956
if (!p->nr_reloc)
6957
continue;
6958
6959
qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6960
}
6961
}
6962
6963
static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog)
6964
{
6965
const char *str = "exception_callback:";
6966
size_t pfx_len = strlen(str);
6967
int i, j, n;
6968
6969
if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG))
6970
return 0;
6971
6972
n = btf__type_cnt(obj->btf);
6973
for (i = 1; i < n; i++) {
6974
const char *name;
6975
struct btf_type *t;
6976
6977
t = btf_type_by_id(obj->btf, i);
6978
if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
6979
continue;
6980
6981
name = btf__str_by_offset(obj->btf, t->name_off);
6982
if (strncmp(name, str, pfx_len) != 0)
6983
continue;
6984
6985
t = btf_type_by_id(obj->btf, t->type);
6986
if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
6987
pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
6988
prog->name);
6989
return -EINVAL;
6990
}
6991
if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0)
6992
continue;
6993
/* Multiple callbacks are specified for the same prog,
6994
* the verifier will eventually return an error for this
6995
* case, hence simply skip appending a subprog.
6996
*/
6997
if (prog->exception_cb_idx >= 0) {
6998
prog->exception_cb_idx = -1;
6999
break;
7000
}
7001
7002
name += pfx_len;
7003
if (str_is_empty(name)) {
7004
pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
7005
prog->name);
7006
return -EINVAL;
7007
}
7008
7009
for (j = 0; j < obj->nr_programs; j++) {
7010
struct bpf_program *subprog = &obj->programs[j];
7011
7012
if (!prog_is_subprog(obj, subprog))
7013
continue;
7014
if (strcmp(name, subprog->name) != 0)
7015
continue;
7016
/* Enforce non-hidden, as from verifier point of
7017
* view it expects global functions, whereas the
7018
* mark_btf_static fixes up linkage as static.
7019
*/
7020
if (!subprog->sym_global || subprog->mark_btf_static) {
7021
pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
7022
prog->name, subprog->name);
7023
return -EINVAL;
7024
}
7025
/* Let's see if we already saw a static exception callback with the same name */
7026
if (prog->exception_cb_idx >= 0) {
7027
pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
7028
prog->name, subprog->name);
7029
return -EINVAL;
7030
}
7031
prog->exception_cb_idx = j;
7032
break;
7033
}
7034
7035
if (prog->exception_cb_idx >= 0)
7036
continue;
7037
7038
pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
7039
return -ENOENT;
7040
}
7041
7042
return 0;
7043
}
7044
7045
static struct {
7046
enum bpf_prog_type prog_type;
7047
const char *ctx_name;
7048
} global_ctx_map[] = {
7049
{ BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" },
7050
{ BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" },
7051
{ BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" },
7052
{ BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" },
7053
{ BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" },
7054
{ BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" },
7055
{ BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" },
7056
{ BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" },
7057
{ BPF_PROG_TYPE_LWT_IN, "__sk_buff" },
7058
{ BPF_PROG_TYPE_LWT_OUT, "__sk_buff" },
7059
{ BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" },
7060
{ BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" },
7061
{ BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" },
7062
{ BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" },
7063
{ BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" },
7064
{ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
7065
{ BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" },
7066
{ BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" },
7067
{ BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" },
7068
{ BPF_PROG_TYPE_SK_MSG, "sk_msg_md" },
7069
{ BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" },
7070
{ BPF_PROG_TYPE_SK_SKB, "__sk_buff" },
7071
{ BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" },
7072
{ BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" },
7073
{ BPF_PROG_TYPE_XDP, "xdp_md" },
7074
/* all other program types don't have "named" context structs */
7075
};
7076
7077
/* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
7078
* for below __builtin_types_compatible_p() checks;
7079
* with this approach we don't need any extra arch-specific #ifdef guards
7080
*/
7081
struct pt_regs;
7082
struct user_pt_regs;
7083
struct user_regs_struct;
7084
7085
static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
7086
const char *subprog_name, int arg_idx,
7087
int arg_type_id, const char *ctx_name)
7088
{
7089
const struct btf_type *t;
7090
const char *tname;
7091
7092
/* check if existing parameter already matches verifier expectations */
7093
t = skip_mods_and_typedefs(btf, arg_type_id, NULL);
7094
if (!btf_is_ptr(t))
7095
goto out_warn;
7096
7097
/* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
7098
* and perf_event programs, so check this case early on and forget
7099
* about it for subsequent checks
7100
*/
7101
while (btf_is_mod(t))
7102
t = btf__type_by_id(btf, t->type);
7103
if (btf_is_typedef(t) &&
7104
(prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) {
7105
tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
7106
if (strcmp(tname, "bpf_user_pt_regs_t") == 0)
7107
return false; /* canonical type for kprobe/perf_event */
7108
}
7109
7110
/* now we can ignore typedefs moving forward */
7111
t = skip_mods_and_typedefs(btf, t->type, NULL);
7112
7113
/* if it's `void *`, definitely fix up BTF info */
7114
if (btf_is_void(t))
7115
return true;
7116
7117
/* if it's already proper canonical type, no need to fix up */
7118
tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
7119
if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0)
7120
return false;
7121
7122
/* special cases */
7123
switch (prog->type) {
7124
case BPF_PROG_TYPE_KPROBE:
7125
/* `struct pt_regs *` is expected, but we need to fix up */
7126
if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
7127
return true;
7128
break;
7129
case BPF_PROG_TYPE_PERF_EVENT:
7130
if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
7131
btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
7132
return true;
7133
if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
7134
btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
7135
return true;
7136
if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
7137
btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
7138
return true;
7139
break;
7140
case BPF_PROG_TYPE_RAW_TRACEPOINT:
7141
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
7142
/* allow u64* as ctx */
7143
if (btf_is_int(t) && t->size == 8)
7144
return true;
7145
break;
7146
default:
7147
break;
7148
}
7149
7150
out_warn:
7151
pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
7152
prog->name, subprog_name, arg_idx, ctx_name);
7153
return false;
7154
}
7155
7156
static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
7157
{
7158
int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
7159
int i, err, arg_cnt, fn_name_off, linkage;
7160
struct btf_type *fn_t, *fn_proto_t, *t;
7161
struct btf_param *p;
7162
7163
/* caller already validated FUNC -> FUNC_PROTO validity */
7164
fn_t = btf_type_by_id(btf, orig_fn_id);
7165
fn_proto_t = btf_type_by_id(btf, fn_t->type);
7166
7167
/* Note that each btf__add_xxx() operation invalidates
7168
* all btf_type and string pointers, so we need to be
7169
* very careful when cloning BTF types. BTF type
7170
* pointers have to be always refetched. And to avoid
7171
* problems with invalidated string pointers, we
7172
* add empty strings initially, then just fix up
7173
* name_off offsets in place. Offsets are stable for
7174
* existing strings, so that works out.
7175
*/
7176
fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */
7177
linkage = btf_func_linkage(fn_t);
7178
orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
7179
ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */
7180
arg_cnt = btf_vlen(fn_proto_t);
7181
7182
/* clone FUNC_PROTO and its params */
7183
fn_proto_id = btf__add_func_proto(btf, ret_type_id);
7184
if (fn_proto_id < 0)
7185
return -EINVAL;
7186
7187
for (i = 0; i < arg_cnt; i++) {
7188
int name_off;
7189
7190
/* copy original parameter data */
7191
t = btf_type_by_id(btf, orig_proto_id);
7192
p = &btf_params(t)[i];
7193
name_off = p->name_off;
7194
7195
err = btf__add_func_param(btf, "", p->type);
7196
if (err)
7197
return err;
7198
7199
fn_proto_t = btf_type_by_id(btf, fn_proto_id);
7200
p = &btf_params(fn_proto_t)[i];
7201
p->name_off = name_off; /* use remembered str offset */
7202
}
7203
7204
/* clone FUNC now, btf__add_func() enforces non-empty name, so use
7205
* entry program's name as a placeholder, which we replace immediately
7206
* with original name_off
7207
*/
7208
fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id);
7209
if (fn_id < 0)
7210
return -EINVAL;
7211
7212
fn_t = btf_type_by_id(btf, fn_id);
7213
fn_t->name_off = fn_name_off; /* reuse original string */
7214
7215
return fn_id;
7216
}
7217
7218
/* Check if main program or global subprog's function prototype has `arg:ctx`
7219
* argument tags, and, if necessary, substitute correct type to match what BPF
7220
* verifier would expect, taking into account specific program type. This
7221
* allows to support __arg_ctx tag transparently on old kernels that don't yet
7222
* have a native support for it in the verifier, making user's life much
7223
* easier.
7224
*/
7225
static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
7226
{
7227
const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name;
7228
struct bpf_func_info_min *func_rec;
7229
struct btf_type *fn_t, *fn_proto_t;
7230
struct btf *btf = obj->btf;
7231
const struct btf_type *t;
7232
struct btf_param *p;
7233
int ptr_id = 0, struct_id, tag_id, orig_fn_id;
7234
int i, n, arg_idx, arg_cnt, err, rec_idx;
7235
int *orig_ids;
7236
7237
/* no .BTF.ext, no problem */
7238
if (!obj->btf_ext || !prog->func_info)
7239
return 0;
7240
7241
/* don't do any fix ups if kernel natively supports __arg_ctx */
7242
if (kernel_supports(obj, FEAT_ARG_CTX_TAG))
7243
return 0;
7244
7245
/* some BPF program types just don't have named context structs, so
7246
* this fallback mechanism doesn't work for them
7247
*/
7248
for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
7249
if (global_ctx_map[i].prog_type != prog->type)
7250
continue;
7251
ctx_name = global_ctx_map[i].ctx_name;
7252
break;
7253
}
7254
if (!ctx_name)
7255
return 0;
7256
7257
/* remember original func BTF IDs to detect if we already cloned them */
7258
orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
7259
if (!orig_ids)
7260
return -ENOMEM;
7261
for (i = 0; i < prog->func_info_cnt; i++) {
7262
func_rec = prog->func_info + prog->func_info_rec_size * i;
7263
orig_ids[i] = func_rec->type_id;
7264
}
7265
7266
/* go through each DECL_TAG with "arg:ctx" and see if it points to one
7267
* of our subprogs; if yes and subprog is global and needs adjustment,
7268
* clone and adjust FUNC -> FUNC_PROTO combo
7269
*/
7270
for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
7271
/* only DECL_TAG with "arg:ctx" value are interesting */
7272
t = btf__type_by_id(btf, i);
7273
if (!btf_is_decl_tag(t))
7274
continue;
7275
if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
7276
continue;
7277
7278
/* only global funcs need adjustment, if at all */
7279
orig_fn_id = t->type;
7280
fn_t = btf_type_by_id(btf, orig_fn_id);
7281
if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
7282
continue;
7283
7284
/* sanity check FUNC -> FUNC_PROTO chain, just in case */
7285
fn_proto_t = btf_type_by_id(btf, fn_t->type);
7286
if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
7287
continue;
7288
7289
/* find corresponding func_info record */
7290
func_rec = NULL;
7291
for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
7292
if (orig_ids[rec_idx] == t->type) {
7293
func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
7294
break;
7295
}
7296
}
7297
/* current main program doesn't call into this subprog */
7298
if (!func_rec)
7299
continue;
7300
7301
/* some more sanity checking of DECL_TAG */
7302
arg_cnt = btf_vlen(fn_proto_t);
7303
arg_idx = btf_decl_tag(t)->component_idx;
7304
if (arg_idx < 0 || arg_idx >= arg_cnt)
7305
continue;
7306
7307
/* check if we should fix up argument type */
7308
p = &btf_params(fn_proto_t)[arg_idx];
7309
fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>";
7310
if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name))
7311
continue;
7312
7313
/* clone fn/fn_proto, unless we already did it for another arg */
7314
if (func_rec->type_id == orig_fn_id) {
7315
int fn_id;
7316
7317
fn_id = clone_func_btf_info(btf, orig_fn_id, prog);
7318
if (fn_id < 0) {
7319
err = fn_id;
7320
goto err_out;
7321
}
7322
7323
/* point func_info record to a cloned FUNC type */
7324
func_rec->type_id = fn_id;
7325
}
7326
7327
/* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
7328
* we do it just once per main BPF program, as all global
7329
* funcs share the same program type, so need only PTR ->
7330
* STRUCT type chain
7331
*/
7332
if (ptr_id == 0) {
7333
struct_id = btf__add_struct(btf, ctx_name, 0);
7334
ptr_id = btf__add_ptr(btf, struct_id);
7335
if (ptr_id < 0 || struct_id < 0) {
7336
err = -EINVAL;
7337
goto err_out;
7338
}
7339
}
7340
7341
/* for completeness, clone DECL_TAG and point it to cloned param */
7342
tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
7343
if (tag_id < 0) {
7344
err = -EINVAL;
7345
goto err_out;
7346
}
7347
7348
/* all the BTF manipulations invalidated pointers, refetch them */
7349
fn_t = btf_type_by_id(btf, func_rec->type_id);
7350
fn_proto_t = btf_type_by_id(btf, fn_t->type);
7351
7352
/* fix up type ID pointed to by param */
7353
p = &btf_params(fn_proto_t)[arg_idx];
7354
p->type = ptr_id;
7355
}
7356
7357
free(orig_ids);
7358
return 0;
7359
err_out:
7360
free(orig_ids);
7361
return err;
7362
}
7363
7364
static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
7365
{
7366
struct bpf_program *prog;
7367
size_t i, j;
7368
int err;
7369
7370
if (obj->btf_ext) {
7371
err = bpf_object__relocate_core(obj, targ_btf_path);
7372
if (err) {
7373
pr_warn("failed to perform CO-RE relocations: %s\n",
7374
errstr(err));
7375
return err;
7376
}
7377
bpf_object__sort_relos(obj);
7378
}
7379
7380
/* Before relocating calls pre-process relocations and mark
7381
* few ld_imm64 instructions that points to subprogs.
7382
* Otherwise bpf_object__reloc_code() later would have to consider
7383
* all ld_imm64 insns as relocation candidates. That would
7384
* reduce relocation speed, since amount of find_prog_insn_relo()
7385
* would increase and most of them will fail to find a relo.
7386
*/
7387
for (i = 0; i < obj->nr_programs; i++) {
7388
prog = &obj->programs[i];
7389
for (j = 0; j < prog->nr_reloc; j++) {
7390
struct reloc_desc *relo = &prog->reloc_desc[j];
7391
struct bpf_insn *insn = &prog->insns[relo->insn_idx];
7392
7393
/* mark the insn, so it's recognized by insn_is_pseudo_func() */
7394
if (relo->type == RELO_SUBPROG_ADDR)
7395
insn[0].src_reg = BPF_PSEUDO_FUNC;
7396
}
7397
}
7398
7399
/* relocate subprogram calls and append used subprograms to main
7400
* programs; each copy of subprogram code needs to be relocated
7401
* differently for each main program, because its code location might
7402
* have changed.
7403
* Append subprog relos to main programs to allow data relos to be
7404
* processed after text is completely relocated.
7405
*/
7406
for (i = 0; i < obj->nr_programs; i++) {
7407
prog = &obj->programs[i];
7408
/* sub-program's sub-calls are relocated within the context of
7409
* its main program only
7410
*/
7411
if (prog_is_subprog(obj, prog))
7412
continue;
7413
if (!prog->autoload)
7414
continue;
7415
7416
err = bpf_object__relocate_calls(obj, prog);
7417
if (err) {
7418
pr_warn("prog '%s': failed to relocate calls: %s\n",
7419
prog->name, errstr(err));
7420
return err;
7421
}
7422
7423
err = bpf_prog_assign_exc_cb(obj, prog);
7424
if (err)
7425
return err;
7426
/* Now, also append exception callback if it has not been done already. */
7427
if (prog->exception_cb_idx >= 0) {
7428
struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
7429
7430
/* Calling exception callback directly is disallowed, which the
7431
* verifier will reject later. In case it was processed already,
7432
* we can skip this step, otherwise for all other valid cases we
7433
* have to append exception callback now.
7434
*/
7435
if (subprog->sub_insn_off == 0) {
7436
err = bpf_object__append_subprog_code(obj, prog, subprog);
7437
if (err)
7438
return err;
7439
err = bpf_object__reloc_code(obj, prog, subprog);
7440
if (err)
7441
return err;
7442
}
7443
}
7444
}
7445
for (i = 0; i < obj->nr_programs; i++) {
7446
prog = &obj->programs[i];
7447
if (prog_is_subprog(obj, prog))
7448
continue;
7449
if (!prog->autoload)
7450
continue;
7451
7452
/* Process data relos for main programs */
7453
err = bpf_object__relocate_data(obj, prog);
7454
if (err) {
7455
pr_warn("prog '%s': failed to relocate data references: %s\n",
7456
prog->name, errstr(err));
7457
return err;
7458
}
7459
7460
/* Fix up .BTF.ext information, if necessary */
7461
err = bpf_program_fixup_func_info(obj, prog);
7462
if (err) {
7463
pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %s\n",
7464
prog->name, errstr(err));
7465
return err;
7466
}
7467
}
7468
7469
return 0;
7470
}
7471
7472
static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
7473
Elf64_Shdr *shdr, Elf_Data *data);
7474
7475
static int bpf_object__collect_map_relos(struct bpf_object *obj,
7476
Elf64_Shdr *shdr, Elf_Data *data)
7477
{
7478
const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
7479
int i, j, nrels, new_sz;
7480
const struct btf_var_secinfo *vi = NULL;
7481
const struct btf_type *sec, *var, *def;
7482
struct bpf_map *map = NULL, *targ_map = NULL;
7483
struct bpf_program *targ_prog = NULL;
7484
bool is_prog_array, is_map_in_map;
7485
const struct btf_member *member;
7486
const char *name, *mname, *type;
7487
unsigned int moff;
7488
Elf64_Sym *sym;
7489
Elf64_Rel *rel;
7490
void *tmp;
7491
7492
if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
7493
return -EINVAL;
7494
sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
7495
if (!sec)
7496
return -EINVAL;
7497
7498
nrels = shdr->sh_size / shdr->sh_entsize;
7499
for (i = 0; i < nrels; i++) {
7500
rel = elf_rel_by_idx(data, i);
7501
if (!rel) {
7502
pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
7503
return -LIBBPF_ERRNO__FORMAT;
7504
}
7505
7506
sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
7507
if (!sym) {
7508
pr_warn(".maps relo #%d: symbol %zx not found\n",
7509
i, (size_t)ELF64_R_SYM(rel->r_info));
7510
return -LIBBPF_ERRNO__FORMAT;
7511
}
7512
name = elf_sym_str(obj, sym->st_name) ?: "<?>";
7513
7514
pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
7515
i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
7516
(size_t)rel->r_offset, sym->st_name, name);
7517
7518
for (j = 0; j < obj->nr_maps; j++) {
7519
map = &obj->maps[j];
7520
if (map->sec_idx != obj->efile.btf_maps_shndx)
7521
continue;
7522
7523
vi = btf_var_secinfos(sec) + map->btf_var_idx;
7524
if (vi->offset <= rel->r_offset &&
7525
rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
7526
break;
7527
}
7528
if (j == obj->nr_maps) {
7529
pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
7530
i, name, (size_t)rel->r_offset);
7531
return -EINVAL;
7532
}
7533
7534
is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
7535
is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
7536
type = is_map_in_map ? "map" : "prog";
7537
if (is_map_in_map) {
7538
if (sym->st_shndx != obj->efile.btf_maps_shndx) {
7539
pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
7540
i, name);
7541
return -LIBBPF_ERRNO__RELOC;
7542
}
7543
if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
7544
map->def.key_size != sizeof(int)) {
7545
pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
7546
i, map->name, sizeof(int));
7547
return -EINVAL;
7548
}
7549
targ_map = bpf_object__find_map_by_name(obj, name);
7550
if (!targ_map) {
7551
pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
7552
i, name);
7553
return -ESRCH;
7554
}
7555
} else if (is_prog_array) {
7556
targ_prog = bpf_object__find_program_by_name(obj, name);
7557
if (!targ_prog) {
7558
pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
7559
i, name);
7560
return -ESRCH;
7561
}
7562
if (targ_prog->sec_idx != sym->st_shndx ||
7563
targ_prog->sec_insn_off * 8 != sym->st_value ||
7564
prog_is_subprog(obj, targ_prog)) {
7565
pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
7566
i, name);
7567
return -LIBBPF_ERRNO__RELOC;
7568
}
7569
} else {
7570
return -EINVAL;
7571
}
7572
7573
var = btf__type_by_id(obj->btf, vi->type);
7574
def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
7575
if (btf_vlen(def) == 0)
7576
return -EINVAL;
7577
member = btf_members(def) + btf_vlen(def) - 1;
7578
mname = btf__name_by_offset(obj->btf, member->name_off);
7579
if (strcmp(mname, "values"))
7580
return -EINVAL;
7581
7582
moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
7583
if (rel->r_offset - vi->offset < moff)
7584
return -EINVAL;
7585
7586
moff = rel->r_offset - vi->offset - moff;
7587
/* here we use BPF pointer size, which is always 64 bit, as we
7588
* are parsing ELF that was built for BPF target
7589
*/
7590
if (moff % bpf_ptr_sz)
7591
return -EINVAL;
7592
moff /= bpf_ptr_sz;
7593
if (moff >= map->init_slots_sz) {
7594
new_sz = moff + 1;
7595
tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
7596
if (!tmp)
7597
return -ENOMEM;
7598
map->init_slots = tmp;
7599
memset(map->init_slots + map->init_slots_sz, 0,
7600
(new_sz - map->init_slots_sz) * host_ptr_sz);
7601
map->init_slots_sz = new_sz;
7602
}
7603
map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
7604
7605
pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
7606
i, map->name, moff, type, name);
7607
}
7608
7609
return 0;
7610
}
7611
7612
static int bpf_object__collect_relos(struct bpf_object *obj)
7613
{
7614
int i, err;
7615
7616
for (i = 0; i < obj->efile.sec_cnt; i++) {
7617
struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
7618
Elf64_Shdr *shdr;
7619
Elf_Data *data;
7620
int idx;
7621
7622
if (sec_desc->sec_type != SEC_RELO)
7623
continue;
7624
7625
shdr = sec_desc->shdr;
7626
data = sec_desc->data;
7627
idx = shdr->sh_info;
7628
7629
if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) {
7630
pr_warn("internal error at %d\n", __LINE__);
7631
return -LIBBPF_ERRNO__INTERNAL;
7632
}
7633
7634
if (obj->efile.secs[idx].sec_type == SEC_ST_OPS)
7635
err = bpf_object__collect_st_ops_relos(obj, shdr, data);
7636
else if (idx == obj->efile.btf_maps_shndx)
7637
err = bpf_object__collect_map_relos(obj, shdr, data);
7638
else
7639
err = bpf_object__collect_prog_relos(obj, shdr, data);
7640
if (err)
7641
return err;
7642
}
7643
7644
bpf_object__sort_relos(obj);
7645
return 0;
7646
}
7647
7648
static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
7649
{
7650
if (BPF_CLASS(insn->code) == BPF_JMP &&
7651
BPF_OP(insn->code) == BPF_CALL &&
7652
BPF_SRC(insn->code) == BPF_K &&
7653
insn->src_reg == 0 &&
7654
insn->dst_reg == 0) {
7655
*func_id = insn->imm;
7656
return true;
7657
}
7658
return false;
7659
}
7660
7661
static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
7662
{
7663
struct bpf_insn *insn = prog->insns;
7664
enum bpf_func_id func_id;
7665
int i;
7666
7667
if (obj->gen_loader)
7668
return 0;
7669
7670
for (i = 0; i < prog->insns_cnt; i++, insn++) {
7671
if (!insn_is_helper_call(insn, &func_id))
7672
continue;
7673
7674
/* on kernels that don't yet support
7675
* bpf_probe_read_{kernel,user}[_str] helpers, fall back
7676
* to bpf_probe_read() which works well for old kernels
7677
*/
7678
switch (func_id) {
7679
case BPF_FUNC_probe_read_kernel:
7680
case BPF_FUNC_probe_read_user:
7681
if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7682
insn->imm = BPF_FUNC_probe_read;
7683
break;
7684
case BPF_FUNC_probe_read_kernel_str:
7685
case BPF_FUNC_probe_read_user_str:
7686
if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7687
insn->imm = BPF_FUNC_probe_read_str;
7688
break;
7689
default:
7690
break;
7691
}
7692
}
7693
return 0;
7694
}
7695
7696
static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
7697
int *btf_obj_fd, int *btf_type_id);
7698
7699
/* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
7700
static int libbpf_prepare_prog_load(struct bpf_program *prog,
7701
struct bpf_prog_load_opts *opts, long cookie)
7702
{
7703
enum sec_def_flags def = cookie;
7704
7705
/* old kernels might not support specifying expected_attach_type */
7706
if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
7707
opts->expected_attach_type = 0;
7708
7709
if (def & SEC_SLEEPABLE)
7710
opts->prog_flags |= BPF_F_SLEEPABLE;
7711
7712
if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
7713
opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
7714
7715
/* special check for usdt to use uprobe_multi link */
7716
if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) {
7717
/* for BPF_TRACE_UPROBE_MULTI, user might want to query expected_attach_type
7718
* in prog, and expected_attach_type we set in kernel is from opts, so we
7719
* update both.
7720
*/
7721
prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
7722
opts->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
7723
}
7724
7725
if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
7726
int btf_obj_fd = 0, btf_type_id = 0, err;
7727
const char *attach_name;
7728
7729
attach_name = strchr(prog->sec_name, '/');
7730
if (!attach_name) {
7731
/* if BPF program is annotated with just SEC("fentry")
7732
* (or similar) without declaratively specifying
7733
* target, then it is expected that target will be
7734
* specified with bpf_program__set_attach_target() at
7735
* runtime before BPF object load step. If not, then
7736
* there is nothing to load into the kernel as BPF
7737
* verifier won't be able to validate BPF program
7738
* correctness anyways.
7739
*/
7740
pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
7741
prog->name);
7742
return -EINVAL;
7743
}
7744
attach_name++; /* skip over / */
7745
7746
err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
7747
if (err)
7748
return err;
7749
7750
/* cache resolved BTF FD and BTF type ID in the prog */
7751
prog->attach_btf_obj_fd = btf_obj_fd;
7752
prog->attach_btf_id = btf_type_id;
7753
7754
/* but by now libbpf common logic is not utilizing
7755
* prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
7756
* this callback is called after opts were populated by
7757
* libbpf, so this callback has to update opts explicitly here
7758
*/
7759
opts->attach_btf_obj_fd = btf_obj_fd;
7760
opts->attach_btf_id = btf_type_id;
7761
}
7762
return 0;
7763
}
7764
7765
static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
7766
7767
static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
7768
struct bpf_insn *insns, int insns_cnt,
7769
const char *license, __u32 kern_version, int *prog_fd)
7770
{
7771
LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
7772
const char *prog_name = NULL;
7773
size_t log_buf_size = 0;
7774
char *log_buf = NULL, *tmp;
7775
bool own_log_buf = true;
7776
__u32 log_level = prog->log_level;
7777
int ret, err;
7778
7779
/* Be more helpful by rejecting programs that can't be validated early
7780
* with more meaningful and actionable error message.
7781
*/
7782
switch (prog->type) {
7783
case BPF_PROG_TYPE_UNSPEC:
7784
/*
7785
* The program type must be set. Most likely we couldn't find a proper
7786
* section definition at load time, and thus we didn't infer the type.
7787
*/
7788
pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
7789
prog->name, prog->sec_name);
7790
return -EINVAL;
7791
case BPF_PROG_TYPE_STRUCT_OPS:
7792
if (prog->attach_btf_id == 0) {
7793
pr_warn("prog '%s': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?\n",
7794
prog->name);
7795
return -EINVAL;
7796
}
7797
break;
7798
default:
7799
break;
7800
}
7801
7802
if (!insns || !insns_cnt)
7803
return -EINVAL;
7804
7805
if (kernel_supports(obj, FEAT_PROG_NAME))
7806
prog_name = prog->name;
7807
load_attr.attach_prog_fd = prog->attach_prog_fd;
7808
load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
7809
load_attr.attach_btf_id = prog->attach_btf_id;
7810
load_attr.kern_version = kern_version;
7811
load_attr.prog_ifindex = prog->prog_ifindex;
7812
load_attr.expected_attach_type = prog->expected_attach_type;
7813
7814
/* specify func_info/line_info only if kernel supports them */
7815
if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
7816
load_attr.prog_btf_fd = btf__fd(obj->btf);
7817
load_attr.func_info = prog->func_info;
7818
load_attr.func_info_rec_size = prog->func_info_rec_size;
7819
load_attr.func_info_cnt = prog->func_info_cnt;
7820
load_attr.line_info = prog->line_info;
7821
load_attr.line_info_rec_size = prog->line_info_rec_size;
7822
load_attr.line_info_cnt = prog->line_info_cnt;
7823
}
7824
load_attr.log_level = log_level;
7825
load_attr.prog_flags = prog->prog_flags;
7826
load_attr.fd_array = obj->fd_array;
7827
7828
load_attr.token_fd = obj->token_fd;
7829
if (obj->token_fd)
7830
load_attr.prog_flags |= BPF_F_TOKEN_FD;
7831
7832
/* adjust load_attr if sec_def provides custom preload callback */
7833
if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
7834
err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
7835
if (err < 0) {
7836
pr_warn("prog '%s': failed to prepare load attributes: %s\n",
7837
prog->name, errstr(err));
7838
return err;
7839
}
7840
insns = prog->insns;
7841
insns_cnt = prog->insns_cnt;
7842
}
7843
7844
if (obj->gen_loader) {
7845
bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
7846
license, insns, insns_cnt, &load_attr,
7847
prog - obj->programs);
7848
*prog_fd = -1;
7849
return 0;
7850
}
7851
7852
retry_load:
7853
/* if log_level is zero, we don't request logs initially even if
7854
* custom log_buf is specified; if the program load fails, then we'll
7855
* bump log_level to 1 and use either custom log_buf or we'll allocate
7856
* our own and retry the load to get details on what failed
7857
*/
7858
if (log_level) {
7859
if (prog->log_buf) {
7860
log_buf = prog->log_buf;
7861
log_buf_size = prog->log_size;
7862
own_log_buf = false;
7863
} else if (obj->log_buf) {
7864
log_buf = obj->log_buf;
7865
log_buf_size = obj->log_size;
7866
own_log_buf = false;
7867
} else {
7868
log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
7869
tmp = realloc(log_buf, log_buf_size);
7870
if (!tmp) {
7871
ret = -ENOMEM;
7872
goto out;
7873
}
7874
log_buf = tmp;
7875
log_buf[0] = '\0';
7876
own_log_buf = true;
7877
}
7878
}
7879
7880
load_attr.log_buf = log_buf;
7881
load_attr.log_size = log_buf_size;
7882
load_attr.log_level = log_level;
7883
7884
ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
7885
if (ret >= 0) {
7886
if (log_level && own_log_buf) {
7887
pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7888
prog->name, log_buf);
7889
}
7890
7891
if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
7892
struct bpf_map *map;
7893
int i;
7894
7895
for (i = 0; i < obj->nr_maps; i++) {
7896
map = &prog->obj->maps[i];
7897
if (map->libbpf_type != LIBBPF_MAP_RODATA)
7898
continue;
7899
7900
if (bpf_prog_bind_map(ret, map->fd, NULL)) {
7901
pr_warn("prog '%s': failed to bind map '%s': %s\n",
7902
prog->name, map->real_name, errstr(errno));
7903
/* Don't fail hard if can't bind rodata. */
7904
}
7905
}
7906
}
7907
7908
*prog_fd = ret;
7909
ret = 0;
7910
goto out;
7911
}
7912
7913
if (log_level == 0) {
7914
log_level = 1;
7915
goto retry_load;
7916
}
7917
/* On ENOSPC, increase log buffer size and retry, unless custom
7918
* log_buf is specified.
7919
* Be careful to not overflow u32, though. Kernel's log buf size limit
7920
* isn't part of UAPI so it can always be bumped to full 4GB. So don't
7921
* multiply by 2 unless we are sure we'll fit within 32 bits.
7922
* Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
7923
*/
7924
if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
7925
goto retry_load;
7926
7927
ret = -errno;
7928
7929
/* post-process verifier log to improve error descriptions */
7930
fixup_verifier_log(prog, log_buf, log_buf_size);
7931
7932
pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, errstr(errno));
7933
pr_perm_msg(ret);
7934
7935
if (own_log_buf && log_buf && log_buf[0] != '\0') {
7936
pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7937
prog->name, log_buf);
7938
}
7939
7940
out:
7941
if (own_log_buf)
7942
free(log_buf);
7943
return ret;
7944
}
7945
7946
static char *find_prev_line(char *buf, char *cur)
7947
{
7948
char *p;
7949
7950
if (cur == buf) /* end of a log buf */
7951
return NULL;
7952
7953
p = cur - 1;
7954
while (p - 1 >= buf && *(p - 1) != '\n')
7955
p--;
7956
7957
return p;
7958
}
7959
7960
static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
7961
char *orig, size_t orig_sz, const char *patch)
7962
{
7963
/* size of the remaining log content to the right from the to-be-replaced part */
7964
size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
7965
size_t patch_sz = strlen(patch);
7966
7967
if (patch_sz != orig_sz) {
7968
/* If patch line(s) are longer than original piece of verifier log,
7969
* shift log contents by (patch_sz - orig_sz) bytes to the right
7970
* starting from after to-be-replaced part of the log.
7971
*
7972
* If patch line(s) are shorter than original piece of verifier log,
7973
* shift log contents by (orig_sz - patch_sz) bytes to the left
7974
* starting from after to-be-replaced part of the log
7975
*
7976
* We need to be careful about not overflowing available
7977
* buf_sz capacity. If that's the case, we'll truncate the end
7978
* of the original log, as necessary.
7979
*/
7980
if (patch_sz > orig_sz) {
7981
if (orig + patch_sz >= buf + buf_sz) {
7982
/* patch is big enough to cover remaining space completely */
7983
patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
7984
rem_sz = 0;
7985
} else if (patch_sz - orig_sz > buf_sz - log_sz) {
7986
/* patch causes part of remaining log to be truncated */
7987
rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
7988
}
7989
}
7990
/* shift remaining log to the right by calculated amount */
7991
memmove(orig + patch_sz, orig + orig_sz, rem_sz);
7992
}
7993
7994
memcpy(orig, patch, patch_sz);
7995
}
7996
7997
static void fixup_log_failed_core_relo(struct bpf_program *prog,
7998
char *buf, size_t buf_sz, size_t log_sz,
7999
char *line1, char *line2, char *line3)
8000
{
8001
/* Expected log for failed and not properly guarded CO-RE relocation:
8002
* line1 -> 123: (85) call unknown#195896080
8003
* line2 -> invalid func unknown#195896080
8004
* line3 -> <anything else or end of buffer>
8005
*
8006
* "123" is the index of the instruction that was poisoned. We extract
8007
* instruction index to find corresponding CO-RE relocation and
8008
* replace this part of the log with more relevant information about
8009
* failed CO-RE relocation.
8010
*/
8011
const struct bpf_core_relo *relo;
8012
struct bpf_core_spec spec;
8013
char patch[512], spec_buf[256];
8014
int insn_idx, err, spec_len;
8015
8016
if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
8017
return;
8018
8019
relo = find_relo_core(prog, insn_idx);
8020
if (!relo)
8021
return;
8022
8023
err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
8024
if (err)
8025
return;
8026
8027
spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
8028
snprintf(patch, sizeof(patch),
8029
"%d: <invalid CO-RE relocation>\n"
8030
"failed to resolve CO-RE relocation %s%s\n",
8031
insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
8032
8033
patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
8034
}
8035
8036
static void fixup_log_missing_map_load(struct bpf_program *prog,
8037
char *buf, size_t buf_sz, size_t log_sz,
8038
char *line1, char *line2, char *line3)
8039
{
8040
/* Expected log for failed and not properly guarded map reference:
8041
* line1 -> 123: (85) call unknown#2001000345
8042
* line2 -> invalid func unknown#2001000345
8043
* line3 -> <anything else or end of buffer>
8044
*
8045
* "123" is the index of the instruction that was poisoned.
8046
* "345" in "2001000345" is a map index in obj->maps to fetch map name.
8047
*/
8048
struct bpf_object *obj = prog->obj;
8049
const struct bpf_map *map;
8050
int insn_idx, map_idx;
8051
char patch[128];
8052
8053
if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
8054
return;
8055
8056
map_idx -= POISON_LDIMM64_MAP_BASE;
8057
if (map_idx < 0 || map_idx >= obj->nr_maps)
8058
return;
8059
map = &obj->maps[map_idx];
8060
8061
snprintf(patch, sizeof(patch),
8062
"%d: <invalid BPF map reference>\n"
8063
"BPF map '%s' is referenced but wasn't created\n",
8064
insn_idx, map->name);
8065
8066
patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
8067
}
8068
8069
static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
8070
char *buf, size_t buf_sz, size_t log_sz,
8071
char *line1, char *line2, char *line3)
8072
{
8073
/* Expected log for failed and not properly guarded kfunc call:
8074
* line1 -> 123: (85) call unknown#2002000345
8075
* line2 -> invalid func unknown#2002000345
8076
* line3 -> <anything else or end of buffer>
8077
*
8078
* "123" is the index of the instruction that was poisoned.
8079
* "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
8080
*/
8081
struct bpf_object *obj = prog->obj;
8082
const struct extern_desc *ext;
8083
int insn_idx, ext_idx;
8084
char patch[128];
8085
8086
if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
8087
return;
8088
8089
ext_idx -= POISON_CALL_KFUNC_BASE;
8090
if (ext_idx < 0 || ext_idx >= obj->nr_extern)
8091
return;
8092
ext = &obj->externs[ext_idx];
8093
8094
snprintf(patch, sizeof(patch),
8095
"%d: <invalid kfunc call>\n"
8096
"kfunc '%s' is referenced but wasn't resolved\n",
8097
insn_idx, ext->name);
8098
8099
patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
8100
}
8101
8102
static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
8103
{
8104
/* look for familiar error patterns in last N lines of the log */
8105
const size_t max_last_line_cnt = 10;
8106
char *prev_line, *cur_line, *next_line;
8107
size_t log_sz;
8108
int i;
8109
8110
if (!buf)
8111
return;
8112
8113
log_sz = strlen(buf) + 1;
8114
next_line = buf + log_sz - 1;
8115
8116
for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
8117
cur_line = find_prev_line(buf, next_line);
8118
if (!cur_line)
8119
return;
8120
8121
if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
8122
prev_line = find_prev_line(buf, cur_line);
8123
if (!prev_line)
8124
continue;
8125
8126
/* failed CO-RE relocation case */
8127
fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
8128
prev_line, cur_line, next_line);
8129
return;
8130
} else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
8131
prev_line = find_prev_line(buf, cur_line);
8132
if (!prev_line)
8133
continue;
8134
8135
/* reference to uncreated BPF map */
8136
fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
8137
prev_line, cur_line, next_line);
8138
return;
8139
} else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
8140
prev_line = find_prev_line(buf, cur_line);
8141
if (!prev_line)
8142
continue;
8143
8144
/* reference to unresolved kfunc */
8145
fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
8146
prev_line, cur_line, next_line);
8147
return;
8148
}
8149
}
8150
}
8151
8152
static int bpf_program_record_relos(struct bpf_program *prog)
8153
{
8154
struct bpf_object *obj = prog->obj;
8155
int i;
8156
8157
for (i = 0; i < prog->nr_reloc; i++) {
8158
struct reloc_desc *relo = &prog->reloc_desc[i];
8159
struct extern_desc *ext = &obj->externs[relo->ext_idx];
8160
int kind;
8161
8162
switch (relo->type) {
8163
case RELO_EXTERN_LD64:
8164
if (ext->type != EXT_KSYM)
8165
continue;
8166
kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
8167
BTF_KIND_VAR : BTF_KIND_FUNC;
8168
bpf_gen__record_extern(obj->gen_loader, ext->name,
8169
ext->is_weak, !ext->ksym.type_id,
8170
true, kind, relo->insn_idx);
8171
break;
8172
case RELO_EXTERN_CALL:
8173
bpf_gen__record_extern(obj->gen_loader, ext->name,
8174
ext->is_weak, false, false, BTF_KIND_FUNC,
8175
relo->insn_idx);
8176
break;
8177
case RELO_CORE: {
8178
struct bpf_core_relo cr = {
8179
.insn_off = relo->insn_idx * 8,
8180
.type_id = relo->core_relo->type_id,
8181
.access_str_off = relo->core_relo->access_str_off,
8182
.kind = relo->core_relo->kind,
8183
};
8184
8185
bpf_gen__record_relo_core(obj->gen_loader, &cr);
8186
break;
8187
}
8188
default:
8189
continue;
8190
}
8191
}
8192
return 0;
8193
}
8194
8195
static int
8196
bpf_object__load_progs(struct bpf_object *obj, int log_level)
8197
{
8198
struct bpf_program *prog;
8199
size_t i;
8200
int err;
8201
8202
for (i = 0; i < obj->nr_programs; i++) {
8203
prog = &obj->programs[i];
8204
if (prog_is_subprog(obj, prog))
8205
continue;
8206
if (!prog->autoload) {
8207
pr_debug("prog '%s': skipped loading\n", prog->name);
8208
continue;
8209
}
8210
prog->log_level |= log_level;
8211
8212
if (obj->gen_loader)
8213
bpf_program_record_relos(prog);
8214
8215
err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
8216
obj->license, obj->kern_version, &prog->fd);
8217
if (err) {
8218
pr_warn("prog '%s': failed to load: %s\n", prog->name, errstr(err));
8219
return err;
8220
}
8221
}
8222
8223
bpf_object__free_relocs(obj);
8224
return 0;
8225
}
8226
8227
static int bpf_object_prepare_progs(struct bpf_object *obj)
8228
{
8229
struct bpf_program *prog;
8230
size_t i;
8231
int err;
8232
8233
for (i = 0; i < obj->nr_programs; i++) {
8234
prog = &obj->programs[i];
8235
err = bpf_object__sanitize_prog(obj, prog);
8236
if (err)
8237
return err;
8238
}
8239
return 0;
8240
}
8241
8242
static const struct bpf_sec_def *find_sec_def(const char *sec_name);
8243
8244
static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
8245
{
8246
struct bpf_program *prog;
8247
int err;
8248
8249
bpf_object__for_each_program(prog, obj) {
8250
prog->sec_def = find_sec_def(prog->sec_name);
8251
if (!prog->sec_def) {
8252
/* couldn't guess, but user might manually specify */
8253
pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
8254
prog->name, prog->sec_name);
8255
continue;
8256
}
8257
8258
prog->type = prog->sec_def->prog_type;
8259
prog->expected_attach_type = prog->sec_def->expected_attach_type;
8260
8261
/* sec_def can have custom callback which should be called
8262
* after bpf_program is initialized to adjust its properties
8263
*/
8264
if (prog->sec_def->prog_setup_fn) {
8265
err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
8266
if (err < 0) {
8267
pr_warn("prog '%s': failed to initialize: %s\n",
8268
prog->name, errstr(err));
8269
return err;
8270
}
8271
}
8272
}
8273
8274
return 0;
8275
}
8276
8277
static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
8278
const char *obj_name,
8279
const struct bpf_object_open_opts *opts)
8280
{
8281
const char *kconfig, *btf_tmp_path, *token_path;
8282
struct bpf_object *obj;
8283
int err;
8284
char *log_buf;
8285
size_t log_size;
8286
__u32 log_level;
8287
8288
if (obj_buf && !obj_name)
8289
return ERR_PTR(-EINVAL);
8290
8291
if (elf_version(EV_CURRENT) == EV_NONE) {
8292
pr_warn("failed to init libelf for %s\n",
8293
path ? : "(mem buf)");
8294
return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
8295
}
8296
8297
if (!OPTS_VALID(opts, bpf_object_open_opts))
8298
return ERR_PTR(-EINVAL);
8299
8300
obj_name = OPTS_GET(opts, object_name, NULL) ?: obj_name;
8301
if (obj_buf) {
8302
path = obj_name;
8303
pr_debug("loading object '%s' from buffer\n", obj_name);
8304
} else {
8305
pr_debug("loading object from %s\n", path);
8306
}
8307
8308
log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
8309
log_size = OPTS_GET(opts, kernel_log_size, 0);
8310
log_level = OPTS_GET(opts, kernel_log_level, 0);
8311
if (log_size > UINT_MAX)
8312
return ERR_PTR(-EINVAL);
8313
if (log_size && !log_buf)
8314
return ERR_PTR(-EINVAL);
8315
8316
token_path = OPTS_GET(opts, bpf_token_path, NULL);
8317
/* if user didn't specify bpf_token_path explicitly, check if
8318
* LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
8319
* option
8320
*/
8321
if (!token_path)
8322
token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
8323
if (token_path && strlen(token_path) >= PATH_MAX)
8324
return ERR_PTR(-ENAMETOOLONG);
8325
8326
obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
8327
if (IS_ERR(obj))
8328
return obj;
8329
8330
obj->log_buf = log_buf;
8331
obj->log_size = log_size;
8332
obj->log_level = log_level;
8333
8334
if (token_path) {
8335
obj->token_path = strdup(token_path);
8336
if (!obj->token_path) {
8337
err = -ENOMEM;
8338
goto out;
8339
}
8340
}
8341
8342
btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
8343
if (btf_tmp_path) {
8344
if (strlen(btf_tmp_path) >= PATH_MAX) {
8345
err = -ENAMETOOLONG;
8346
goto out;
8347
}
8348
obj->btf_custom_path = strdup(btf_tmp_path);
8349
if (!obj->btf_custom_path) {
8350
err = -ENOMEM;
8351
goto out;
8352
}
8353
}
8354
8355
kconfig = OPTS_GET(opts, kconfig, NULL);
8356
if (kconfig) {
8357
obj->kconfig = strdup(kconfig);
8358
if (!obj->kconfig) {
8359
err = -ENOMEM;
8360
goto out;
8361
}
8362
}
8363
8364
err = bpf_object__elf_init(obj);
8365
err = err ? : bpf_object__elf_collect(obj);
8366
err = err ? : bpf_object__collect_externs(obj);
8367
err = err ? : bpf_object_fixup_btf(obj);
8368
err = err ? : bpf_object__init_maps(obj, opts);
8369
err = err ? : bpf_object_init_progs(obj, opts);
8370
err = err ? : bpf_object__collect_relos(obj);
8371
if (err)
8372
goto out;
8373
8374
bpf_object__elf_finish(obj);
8375
8376
return obj;
8377
out:
8378
bpf_object__close(obj);
8379
return ERR_PTR(err);
8380
}
8381
8382
struct bpf_object *
8383
bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
8384
{
8385
if (!path)
8386
return libbpf_err_ptr(-EINVAL);
8387
8388
return libbpf_ptr(bpf_object_open(path, NULL, 0, NULL, opts));
8389
}
8390
8391
struct bpf_object *bpf_object__open(const char *path)
8392
{
8393
return bpf_object__open_file(path, NULL);
8394
}
8395
8396
struct bpf_object *
8397
bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
8398
const struct bpf_object_open_opts *opts)
8399
{
8400
char tmp_name[64];
8401
8402
if (!obj_buf || obj_buf_sz == 0)
8403
return libbpf_err_ptr(-EINVAL);
8404
8405
/* create a (quite useless) default "name" for this memory buffer object */
8406
snprintf(tmp_name, sizeof(tmp_name), "%lx-%zx", (unsigned long)obj_buf, obj_buf_sz);
8407
8408
return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, tmp_name, opts));
8409
}
8410
8411
static int bpf_object_unload(struct bpf_object *obj)
8412
{
8413
size_t i;
8414
8415
if (!obj)
8416
return libbpf_err(-EINVAL);
8417
8418
for (i = 0; i < obj->nr_maps; i++) {
8419
zclose(obj->maps[i].fd);
8420
if (obj->maps[i].st_ops)
8421
zfree(&obj->maps[i].st_ops->kern_vdata);
8422
}
8423
8424
for (i = 0; i < obj->nr_programs; i++)
8425
bpf_program__unload(&obj->programs[i]);
8426
8427
return 0;
8428
}
8429
8430
static int bpf_object__sanitize_maps(struct bpf_object *obj)
8431
{
8432
struct bpf_map *m;
8433
8434
bpf_object__for_each_map(m, obj) {
8435
if (!bpf_map__is_internal(m))
8436
continue;
8437
if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
8438
m->def.map_flags &= ~BPF_F_MMAPABLE;
8439
}
8440
8441
return 0;
8442
}
8443
8444
typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type,
8445
const char *sym_name, void *ctx);
8446
8447
static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
8448
{
8449
char sym_type, sym_name[500];
8450
unsigned long long sym_addr;
8451
int ret, err = 0;
8452
FILE *f;
8453
8454
f = fopen("/proc/kallsyms", "re");
8455
if (!f) {
8456
err = -errno;
8457
pr_warn("failed to open /proc/kallsyms: %s\n", errstr(err));
8458
return err;
8459
}
8460
8461
while (true) {
8462
ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
8463
&sym_addr, &sym_type, sym_name);
8464
if (ret == EOF && feof(f))
8465
break;
8466
if (ret != 3) {
8467
pr_warn("failed to read kallsyms entry: %d\n", ret);
8468
err = -EINVAL;
8469
break;
8470
}
8471
8472
err = cb(sym_addr, sym_type, sym_name, ctx);
8473
if (err)
8474
break;
8475
}
8476
8477
fclose(f);
8478
return err;
8479
}
8480
8481
static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
8482
const char *sym_name, void *ctx)
8483
{
8484
struct bpf_object *obj = ctx;
8485
const struct btf_type *t;
8486
struct extern_desc *ext;
8487
const char *res;
8488
8489
res = strstr(sym_name, ".llvm.");
8490
if (sym_type == 'd' && res)
8491
ext = find_extern_by_name_with_len(obj, sym_name, res - sym_name);
8492
else
8493
ext = find_extern_by_name(obj, sym_name);
8494
if (!ext || ext->type != EXT_KSYM)
8495
return 0;
8496
8497
t = btf__type_by_id(obj->btf, ext->btf_id);
8498
if (!btf_is_var(t))
8499
return 0;
8500
8501
if (ext->is_set && ext->ksym.addr != sym_addr) {
8502
pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
8503
sym_name, ext->ksym.addr, sym_addr);
8504
return -EINVAL;
8505
}
8506
if (!ext->is_set) {
8507
ext->is_set = true;
8508
ext->ksym.addr = sym_addr;
8509
pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
8510
}
8511
return 0;
8512
}
8513
8514
static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
8515
{
8516
return libbpf_kallsyms_parse(kallsyms_cb, obj);
8517
}
8518
8519
static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
8520
__u16 kind, struct btf **res_btf,
8521
struct module_btf **res_mod_btf)
8522
{
8523
struct module_btf *mod_btf;
8524
struct btf *btf;
8525
int i, id, err;
8526
8527
btf = obj->btf_vmlinux;
8528
mod_btf = NULL;
8529
id = btf__find_by_name_kind(btf, ksym_name, kind);
8530
8531
if (id == -ENOENT) {
8532
err = load_module_btfs(obj);
8533
if (err)
8534
return err;
8535
8536
for (i = 0; i < obj->btf_module_cnt; i++) {
8537
/* we assume module_btf's BTF FD is always >0 */
8538
mod_btf = &obj->btf_modules[i];
8539
btf = mod_btf->btf;
8540
id = btf__find_by_name_kind_own(btf, ksym_name, kind);
8541
if (id != -ENOENT)
8542
break;
8543
}
8544
}
8545
if (id <= 0)
8546
return -ESRCH;
8547
8548
*res_btf = btf;
8549
*res_mod_btf = mod_btf;
8550
return id;
8551
}
8552
8553
static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
8554
struct extern_desc *ext)
8555
{
8556
const struct btf_type *targ_var, *targ_type;
8557
__u32 targ_type_id, local_type_id;
8558
struct module_btf *mod_btf = NULL;
8559
const char *targ_var_name;
8560
struct btf *btf = NULL;
8561
int id, err;
8562
8563
id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
8564
if (id < 0) {
8565
if (id == -ESRCH && ext->is_weak)
8566
return 0;
8567
pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
8568
ext->name);
8569
return id;
8570
}
8571
8572
/* find local type_id */
8573
local_type_id = ext->ksym.type_id;
8574
8575
/* find target type_id */
8576
targ_var = btf__type_by_id(btf, id);
8577
targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
8578
targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
8579
8580
err = bpf_core_types_are_compat(obj->btf, local_type_id,
8581
btf, targ_type_id);
8582
if (err <= 0) {
8583
const struct btf_type *local_type;
8584
const char *targ_name, *local_name;
8585
8586
local_type = btf__type_by_id(obj->btf, local_type_id);
8587
local_name = btf__name_by_offset(obj->btf, local_type->name_off);
8588
targ_name = btf__name_by_offset(btf, targ_type->name_off);
8589
8590
pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
8591
ext->name, local_type_id,
8592
btf_kind_str(local_type), local_name, targ_type_id,
8593
btf_kind_str(targ_type), targ_name);
8594
return -EINVAL;
8595
}
8596
8597
ext->is_set = true;
8598
ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8599
ext->ksym.kernel_btf_id = id;
8600
pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
8601
ext->name, id, btf_kind_str(targ_var), targ_var_name);
8602
8603
return 0;
8604
}
8605
8606
static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
8607
struct extern_desc *ext)
8608
{
8609
int local_func_proto_id, kfunc_proto_id, kfunc_id;
8610
struct module_btf *mod_btf = NULL;
8611
const struct btf_type *kern_func;
8612
struct btf *kern_btf = NULL;
8613
int ret;
8614
8615
local_func_proto_id = ext->ksym.type_id;
8616
8617
kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf,
8618
&mod_btf);
8619
if (kfunc_id < 0) {
8620
if (kfunc_id == -ESRCH && ext->is_weak)
8621
return 0;
8622
pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
8623
ext->name);
8624
return kfunc_id;
8625
}
8626
8627
kern_func = btf__type_by_id(kern_btf, kfunc_id);
8628
kfunc_proto_id = kern_func->type;
8629
8630
ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
8631
kern_btf, kfunc_proto_id);
8632
if (ret <= 0) {
8633
if (ext->is_weak)
8634
return 0;
8635
8636
pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
8637
ext->name, local_func_proto_id,
8638
mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
8639
return -EINVAL;
8640
}
8641
8642
/* set index for module BTF fd in fd_array, if unset */
8643
if (mod_btf && !mod_btf->fd_array_idx) {
8644
/* insn->off is s16 */
8645
if (obj->fd_array_cnt == INT16_MAX) {
8646
pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
8647
ext->name, mod_btf->fd_array_idx);
8648
return -E2BIG;
8649
}
8650
/* Cannot use index 0 for module BTF fd */
8651
if (!obj->fd_array_cnt)
8652
obj->fd_array_cnt = 1;
8653
8654
ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
8655
obj->fd_array_cnt + 1);
8656
if (ret)
8657
return ret;
8658
mod_btf->fd_array_idx = obj->fd_array_cnt;
8659
/* we assume module BTF FD is always >0 */
8660
obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
8661
}
8662
8663
ext->is_set = true;
8664
ext->ksym.kernel_btf_id = kfunc_id;
8665
ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
8666
/* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
8667
* populates FD into ld_imm64 insn when it's used to point to kfunc.
8668
* {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
8669
* {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
8670
*/
8671
ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8672
pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
8673
ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
8674
8675
return 0;
8676
}
8677
8678
static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
8679
{
8680
const struct btf_type *t;
8681
struct extern_desc *ext;
8682
int i, err;
8683
8684
for (i = 0; i < obj->nr_extern; i++) {
8685
ext = &obj->externs[i];
8686
if (ext->type != EXT_KSYM || !ext->ksym.type_id)
8687
continue;
8688
8689
if (obj->gen_loader) {
8690
ext->is_set = true;
8691
ext->ksym.kernel_btf_obj_fd = 0;
8692
ext->ksym.kernel_btf_id = 0;
8693
continue;
8694
}
8695
t = btf__type_by_id(obj->btf, ext->btf_id);
8696
if (btf_is_var(t))
8697
err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
8698
else
8699
err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
8700
if (err)
8701
return err;
8702
}
8703
return 0;
8704
}
8705
8706
static int bpf_object__resolve_externs(struct bpf_object *obj,
8707
const char *extra_kconfig)
8708
{
8709
bool need_config = false, need_kallsyms = false;
8710
bool need_vmlinux_btf = false;
8711
struct extern_desc *ext;
8712
void *kcfg_data = NULL;
8713
int err, i;
8714
8715
if (obj->nr_extern == 0)
8716
return 0;
8717
8718
if (obj->kconfig_map_idx >= 0)
8719
kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
8720
8721
for (i = 0; i < obj->nr_extern; i++) {
8722
ext = &obj->externs[i];
8723
8724
if (ext->type == EXT_KSYM) {
8725
if (ext->ksym.type_id)
8726
need_vmlinux_btf = true;
8727
else
8728
need_kallsyms = true;
8729
continue;
8730
} else if (ext->type == EXT_KCFG) {
8731
void *ext_ptr = kcfg_data + ext->kcfg.data_off;
8732
__u64 value = 0;
8733
8734
/* Kconfig externs need actual /proc/config.gz */
8735
if (str_has_pfx(ext->name, "CONFIG_")) {
8736
need_config = true;
8737
continue;
8738
}
8739
8740
/* Virtual kcfg externs are customly handled by libbpf */
8741
if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
8742
value = get_kernel_version();
8743
if (!value) {
8744
pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
8745
return -EINVAL;
8746
}
8747
} else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
8748
value = kernel_supports(obj, FEAT_BPF_COOKIE);
8749
} else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
8750
value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
8751
} else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
8752
/* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
8753
* __kconfig externs, where LINUX_ ones are virtual and filled out
8754
* customly by libbpf (their values don't come from Kconfig).
8755
* If LINUX_xxx variable is not recognized by libbpf, but is marked
8756
* __weak, it defaults to zero value, just like for CONFIG_xxx
8757
* externs.
8758
*/
8759
pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
8760
return -EINVAL;
8761
}
8762
8763
err = set_kcfg_value_num(ext, ext_ptr, value);
8764
if (err)
8765
return err;
8766
pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
8767
ext->name, (long long)value);
8768
} else {
8769
pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
8770
return -EINVAL;
8771
}
8772
}
8773
if (need_config && extra_kconfig) {
8774
err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
8775
if (err)
8776
return -EINVAL;
8777
need_config = false;
8778
for (i = 0; i < obj->nr_extern; i++) {
8779
ext = &obj->externs[i];
8780
if (ext->type == EXT_KCFG && !ext->is_set) {
8781
need_config = true;
8782
break;
8783
}
8784
}
8785
}
8786
if (need_config) {
8787
err = bpf_object__read_kconfig_file(obj, kcfg_data);
8788
if (err)
8789
return -EINVAL;
8790
}
8791
if (need_kallsyms) {
8792
err = bpf_object__read_kallsyms_file(obj);
8793
if (err)
8794
return -EINVAL;
8795
}
8796
if (need_vmlinux_btf) {
8797
err = bpf_object__resolve_ksyms_btf_id(obj);
8798
if (err)
8799
return -EINVAL;
8800
}
8801
for (i = 0; i < obj->nr_extern; i++) {
8802
ext = &obj->externs[i];
8803
8804
if (!ext->is_set && !ext->is_weak) {
8805
pr_warn("extern '%s' (strong): not resolved\n", ext->name);
8806
return -ESRCH;
8807
} else if (!ext->is_set) {
8808
pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
8809
ext->name);
8810
}
8811
}
8812
8813
return 0;
8814
}
8815
8816
static void bpf_map_prepare_vdata(const struct bpf_map *map)
8817
{
8818
const struct btf_type *type;
8819
struct bpf_struct_ops *st_ops;
8820
__u32 i;
8821
8822
st_ops = map->st_ops;
8823
type = btf__type_by_id(map->obj->btf, st_ops->type_id);
8824
for (i = 0; i < btf_vlen(type); i++) {
8825
struct bpf_program *prog = st_ops->progs[i];
8826
void *kern_data;
8827
int prog_fd;
8828
8829
if (!prog)
8830
continue;
8831
8832
prog_fd = bpf_program__fd(prog);
8833
kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
8834
*(unsigned long *)kern_data = prog_fd;
8835
}
8836
}
8837
8838
static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
8839
{
8840
struct bpf_map *map;
8841
int i;
8842
8843
for (i = 0; i < obj->nr_maps; i++) {
8844
map = &obj->maps[i];
8845
8846
if (!bpf_map__is_struct_ops(map))
8847
continue;
8848
8849
if (!map->autocreate)
8850
continue;
8851
8852
bpf_map_prepare_vdata(map);
8853
}
8854
8855
return 0;
8856
}
8857
8858
static void bpf_object_unpin(struct bpf_object *obj)
8859
{
8860
int i;
8861
8862
/* unpin any maps that were auto-pinned during load */
8863
for (i = 0; i < obj->nr_maps; i++)
8864
if (obj->maps[i].pinned && !obj->maps[i].reused)
8865
bpf_map__unpin(&obj->maps[i], NULL);
8866
}
8867
8868
static void bpf_object_post_load_cleanup(struct bpf_object *obj)
8869
{
8870
int i;
8871
8872
/* clean up fd_array */
8873
zfree(&obj->fd_array);
8874
8875
/* clean up module BTFs */
8876
for (i = 0; i < obj->btf_module_cnt; i++) {
8877
close(obj->btf_modules[i].fd);
8878
btf__free(obj->btf_modules[i].btf);
8879
free(obj->btf_modules[i].name);
8880
}
8881
obj->btf_module_cnt = 0;
8882
zfree(&obj->btf_modules);
8883
8884
/* clean up vmlinux BTF */
8885
btf__free(obj->btf_vmlinux);
8886
obj->btf_vmlinux = NULL;
8887
}
8888
8889
static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path)
8890
{
8891
int err;
8892
8893
if (obj->state >= OBJ_PREPARED) {
8894
pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name);
8895
return -EINVAL;
8896
}
8897
8898
err = bpf_object_prepare_token(obj);
8899
err = err ? : bpf_object__probe_loading(obj);
8900
err = err ? : bpf_object__load_vmlinux_btf(obj, false);
8901
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
8902
err = err ? : bpf_object__sanitize_maps(obj);
8903
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
8904
err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
8905
err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
8906
err = err ? : bpf_object__sanitize_and_load_btf(obj);
8907
err = err ? : bpf_object__create_maps(obj);
8908
err = err ? : bpf_object_prepare_progs(obj);
8909
8910
if (err) {
8911
bpf_object_unpin(obj);
8912
bpf_object_unload(obj);
8913
obj->state = OBJ_LOADED;
8914
return err;
8915
}
8916
8917
obj->state = OBJ_PREPARED;
8918
return 0;
8919
}
8920
8921
static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
8922
{
8923
int err;
8924
8925
if (!obj)
8926
return libbpf_err(-EINVAL);
8927
8928
if (obj->state >= OBJ_LOADED) {
8929
pr_warn("object '%s': load can't be attempted twice\n", obj->name);
8930
return libbpf_err(-EINVAL);
8931
}
8932
8933
/* Disallow kernel loading programs of non-native endianness but
8934
* permit cross-endian creation of "light skeleton".
8935
*/
8936
if (obj->gen_loader) {
8937
bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
8938
} else if (!is_native_endianness(obj)) {
8939
pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name);
8940
return libbpf_err(-LIBBPF_ERRNO__ENDIAN);
8941
}
8942
8943
if (obj->state < OBJ_PREPARED) {
8944
err = bpf_object_prepare(obj, target_btf_path);
8945
if (err)
8946
return libbpf_err(err);
8947
}
8948
err = bpf_object__load_progs(obj, extra_log_level);
8949
err = err ? : bpf_object_init_prog_arrays(obj);
8950
err = err ? : bpf_object_prepare_struct_ops(obj);
8951
8952
if (obj->gen_loader) {
8953
/* reset FDs */
8954
if (obj->btf)
8955
btf__set_fd(obj->btf, -1);
8956
if (!err)
8957
err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
8958
}
8959
8960
bpf_object_post_load_cleanup(obj);
8961
obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */
8962
8963
if (err) {
8964
bpf_object_unpin(obj);
8965
bpf_object_unload(obj);
8966
pr_warn("failed to load object '%s'\n", obj->path);
8967
return libbpf_err(err);
8968
}
8969
8970
return 0;
8971
}
8972
8973
int bpf_object__prepare(struct bpf_object *obj)
8974
{
8975
return libbpf_err(bpf_object_prepare(obj, NULL));
8976
}
8977
8978
int bpf_object__load(struct bpf_object *obj)
8979
{
8980
return bpf_object_load(obj, 0, NULL);
8981
}
8982
8983
static int make_parent_dir(const char *path)
8984
{
8985
char *dname, *dir;
8986
int err = 0;
8987
8988
dname = strdup(path);
8989
if (dname == NULL)
8990
return -ENOMEM;
8991
8992
dir = dirname(dname);
8993
if (mkdir(dir, 0700) && errno != EEXIST)
8994
err = -errno;
8995
8996
free(dname);
8997
if (err) {
8998
pr_warn("failed to mkdir %s: %s\n", path, errstr(err));
8999
}
9000
return err;
9001
}
9002
9003
static int check_path(const char *path)
9004
{
9005
struct statfs st_fs;
9006
char *dname, *dir;
9007
int err = 0;
9008
9009
if (path == NULL)
9010
return -EINVAL;
9011
9012
dname = strdup(path);
9013
if (dname == NULL)
9014
return -ENOMEM;
9015
9016
dir = dirname(dname);
9017
if (statfs(dir, &st_fs)) {
9018
pr_warn("failed to statfs %s: %s\n", dir, errstr(errno));
9019
err = -errno;
9020
}
9021
free(dname);
9022
9023
if (!err && st_fs.f_type != BPF_FS_MAGIC) {
9024
pr_warn("specified path %s is not on BPF FS\n", path);
9025
err = -EINVAL;
9026
}
9027
9028
return err;
9029
}
9030
9031
int bpf_program__pin(struct bpf_program *prog, const char *path)
9032
{
9033
int err;
9034
9035
if (prog->fd < 0) {
9036
pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
9037
return libbpf_err(-EINVAL);
9038
}
9039
9040
err = make_parent_dir(path);
9041
if (err)
9042
return libbpf_err(err);
9043
9044
err = check_path(path);
9045
if (err)
9046
return libbpf_err(err);
9047
9048
if (bpf_obj_pin(prog->fd, path)) {
9049
err = -errno;
9050
pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, errstr(err));
9051
return libbpf_err(err);
9052
}
9053
9054
pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
9055
return 0;
9056
}
9057
9058
int bpf_program__unpin(struct bpf_program *prog, const char *path)
9059
{
9060
int err;
9061
9062
if (prog->fd < 0) {
9063
pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
9064
return libbpf_err(-EINVAL);
9065
}
9066
9067
err = check_path(path);
9068
if (err)
9069
return libbpf_err(err);
9070
9071
err = unlink(path);
9072
if (err)
9073
return libbpf_err(-errno);
9074
9075
pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
9076
return 0;
9077
}
9078
9079
int bpf_map__pin(struct bpf_map *map, const char *path)
9080
{
9081
int err;
9082
9083
if (map == NULL) {
9084
pr_warn("invalid map pointer\n");
9085
return libbpf_err(-EINVAL);
9086
}
9087
9088
if (map->fd < 0) {
9089
pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name);
9090
return libbpf_err(-EINVAL);
9091
}
9092
9093
if (map->pin_path) {
9094
if (path && strcmp(path, map->pin_path)) {
9095
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
9096
bpf_map__name(map), map->pin_path, path);
9097
return libbpf_err(-EINVAL);
9098
} else if (map->pinned) {
9099
pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
9100
bpf_map__name(map), map->pin_path);
9101
return 0;
9102
}
9103
} else {
9104
if (!path) {
9105
pr_warn("missing a path to pin map '%s' at\n",
9106
bpf_map__name(map));
9107
return libbpf_err(-EINVAL);
9108
} else if (map->pinned) {
9109
pr_warn("map '%s' already pinned\n", bpf_map__name(map));
9110
return libbpf_err(-EEXIST);
9111
}
9112
9113
map->pin_path = strdup(path);
9114
if (!map->pin_path) {
9115
err = -errno;
9116
goto out_err;
9117
}
9118
}
9119
9120
err = make_parent_dir(map->pin_path);
9121
if (err)
9122
return libbpf_err(err);
9123
9124
err = check_path(map->pin_path);
9125
if (err)
9126
return libbpf_err(err);
9127
9128
if (bpf_obj_pin(map->fd, map->pin_path)) {
9129
err = -errno;
9130
goto out_err;
9131
}
9132
9133
map->pinned = true;
9134
pr_debug("pinned map '%s'\n", map->pin_path);
9135
9136
return 0;
9137
9138
out_err:
9139
pr_warn("failed to pin map: %s\n", errstr(err));
9140
return libbpf_err(err);
9141
}
9142
9143
int bpf_map__unpin(struct bpf_map *map, const char *path)
9144
{
9145
int err;
9146
9147
if (map == NULL) {
9148
pr_warn("invalid map pointer\n");
9149
return libbpf_err(-EINVAL);
9150
}
9151
9152
if (map->pin_path) {
9153
if (path && strcmp(path, map->pin_path)) {
9154
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
9155
bpf_map__name(map), map->pin_path, path);
9156
return libbpf_err(-EINVAL);
9157
}
9158
path = map->pin_path;
9159
} else if (!path) {
9160
pr_warn("no path to unpin map '%s' from\n",
9161
bpf_map__name(map));
9162
return libbpf_err(-EINVAL);
9163
}
9164
9165
err = check_path(path);
9166
if (err)
9167
return libbpf_err(err);
9168
9169
err = unlink(path);
9170
if (err != 0)
9171
return libbpf_err(-errno);
9172
9173
map->pinned = false;
9174
pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
9175
9176
return 0;
9177
}
9178
9179
int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
9180
{
9181
char *new = NULL;
9182
9183
if (path) {
9184
new = strdup(path);
9185
if (!new)
9186
return libbpf_err(-errno);
9187
}
9188
9189
free(map->pin_path);
9190
map->pin_path = new;
9191
return 0;
9192
}
9193
9194
__alias(bpf_map__pin_path)
9195
const char *bpf_map__get_pin_path(const struct bpf_map *map);
9196
9197
const char *bpf_map__pin_path(const struct bpf_map *map)
9198
{
9199
return map->pin_path;
9200
}
9201
9202
bool bpf_map__is_pinned(const struct bpf_map *map)
9203
{
9204
return map->pinned;
9205
}
9206
9207
static void sanitize_pin_path(char *s)
9208
{
9209
/* bpffs disallows periods in path names */
9210
while (*s) {
9211
if (*s == '.')
9212
*s = '_';
9213
s++;
9214
}
9215
}
9216
9217
int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
9218
{
9219
struct bpf_map *map;
9220
int err;
9221
9222
if (!obj)
9223
return libbpf_err(-ENOENT);
9224
9225
if (obj->state < OBJ_PREPARED) {
9226
pr_warn("object not yet loaded; load it first\n");
9227
return libbpf_err(-ENOENT);
9228
}
9229
9230
bpf_object__for_each_map(map, obj) {
9231
char *pin_path = NULL;
9232
char buf[PATH_MAX];
9233
9234
if (!map->autocreate)
9235
continue;
9236
9237
if (path) {
9238
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
9239
if (err)
9240
goto err_unpin_maps;
9241
sanitize_pin_path(buf);
9242
pin_path = buf;
9243
} else if (!map->pin_path) {
9244
continue;
9245
}
9246
9247
err = bpf_map__pin(map, pin_path);
9248
if (err)
9249
goto err_unpin_maps;
9250
}
9251
9252
return 0;
9253
9254
err_unpin_maps:
9255
while ((map = bpf_object__prev_map(obj, map))) {
9256
if (!map->pin_path)
9257
continue;
9258
9259
bpf_map__unpin(map, NULL);
9260
}
9261
9262
return libbpf_err(err);
9263
}
9264
9265
int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
9266
{
9267
struct bpf_map *map;
9268
int err;
9269
9270
if (!obj)
9271
return libbpf_err(-ENOENT);
9272
9273
bpf_object__for_each_map(map, obj) {
9274
char *pin_path = NULL;
9275
char buf[PATH_MAX];
9276
9277
if (path) {
9278
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
9279
if (err)
9280
return libbpf_err(err);
9281
sanitize_pin_path(buf);
9282
pin_path = buf;
9283
} else if (!map->pin_path) {
9284
continue;
9285
}
9286
9287
err = bpf_map__unpin(map, pin_path);
9288
if (err)
9289
return libbpf_err(err);
9290
}
9291
9292
return 0;
9293
}
9294
9295
int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
9296
{
9297
struct bpf_program *prog;
9298
char buf[PATH_MAX];
9299
int err;
9300
9301
if (!obj)
9302
return libbpf_err(-ENOENT);
9303
9304
if (obj->state < OBJ_LOADED) {
9305
pr_warn("object not yet loaded; load it first\n");
9306
return libbpf_err(-ENOENT);
9307
}
9308
9309
bpf_object__for_each_program(prog, obj) {
9310
err = pathname_concat(buf, sizeof(buf), path, prog->name);
9311
if (err)
9312
goto err_unpin_programs;
9313
9314
err = bpf_program__pin(prog, buf);
9315
if (err)
9316
goto err_unpin_programs;
9317
}
9318
9319
return 0;
9320
9321
err_unpin_programs:
9322
while ((prog = bpf_object__prev_program(obj, prog))) {
9323
if (pathname_concat(buf, sizeof(buf), path, prog->name))
9324
continue;
9325
9326
bpf_program__unpin(prog, buf);
9327
}
9328
9329
return libbpf_err(err);
9330
}
9331
9332
int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
9333
{
9334
struct bpf_program *prog;
9335
int err;
9336
9337
if (!obj)
9338
return libbpf_err(-ENOENT);
9339
9340
bpf_object__for_each_program(prog, obj) {
9341
char buf[PATH_MAX];
9342
9343
err = pathname_concat(buf, sizeof(buf), path, prog->name);
9344
if (err)
9345
return libbpf_err(err);
9346
9347
err = bpf_program__unpin(prog, buf);
9348
if (err)
9349
return libbpf_err(err);
9350
}
9351
9352
return 0;
9353
}
9354
9355
int bpf_object__pin(struct bpf_object *obj, const char *path)
9356
{
9357
int err;
9358
9359
err = bpf_object__pin_maps(obj, path);
9360
if (err)
9361
return libbpf_err(err);
9362
9363
err = bpf_object__pin_programs(obj, path);
9364
if (err) {
9365
bpf_object__unpin_maps(obj, path);
9366
return libbpf_err(err);
9367
}
9368
9369
return 0;
9370
}
9371
9372
int bpf_object__unpin(struct bpf_object *obj, const char *path)
9373
{
9374
int err;
9375
9376
err = bpf_object__unpin_programs(obj, path);
9377
if (err)
9378
return libbpf_err(err);
9379
9380
err = bpf_object__unpin_maps(obj, path);
9381
if (err)
9382
return libbpf_err(err);
9383
9384
return 0;
9385
}
9386
9387
static void bpf_map__destroy(struct bpf_map *map)
9388
{
9389
if (map->inner_map) {
9390
bpf_map__destroy(map->inner_map);
9391
zfree(&map->inner_map);
9392
}
9393
9394
zfree(&map->init_slots);
9395
map->init_slots_sz = 0;
9396
9397
if (map->mmaped && map->mmaped != map->obj->arena_data)
9398
munmap(map->mmaped, bpf_map_mmap_sz(map));
9399
map->mmaped = NULL;
9400
9401
if (map->st_ops) {
9402
zfree(&map->st_ops->data);
9403
zfree(&map->st_ops->progs);
9404
zfree(&map->st_ops->kern_func_off);
9405
zfree(&map->st_ops);
9406
}
9407
9408
zfree(&map->name);
9409
zfree(&map->real_name);
9410
zfree(&map->pin_path);
9411
9412
if (map->fd >= 0)
9413
zclose(map->fd);
9414
}
9415
9416
void bpf_object__close(struct bpf_object *obj)
9417
{
9418
size_t i;
9419
9420
if (IS_ERR_OR_NULL(obj))
9421
return;
9422
9423
/*
9424
* if user called bpf_object__prepare() without ever getting to
9425
* bpf_object__load(), we need to clean up stuff that is normally
9426
* cleaned up at the end of loading step
9427
*/
9428
bpf_object_post_load_cleanup(obj);
9429
9430
usdt_manager_free(obj->usdt_man);
9431
obj->usdt_man = NULL;
9432
9433
bpf_gen__free(obj->gen_loader);
9434
bpf_object__elf_finish(obj);
9435
bpf_object_unload(obj);
9436
btf__free(obj->btf);
9437
btf__free(obj->btf_vmlinux);
9438
btf_ext__free(obj->btf_ext);
9439
9440
for (i = 0; i < obj->nr_maps; i++)
9441
bpf_map__destroy(&obj->maps[i]);
9442
9443
zfree(&obj->btf_custom_path);
9444
zfree(&obj->kconfig);
9445
9446
for (i = 0; i < obj->nr_extern; i++) {
9447
zfree(&obj->externs[i].name);
9448
zfree(&obj->externs[i].essent_name);
9449
}
9450
9451
zfree(&obj->externs);
9452
obj->nr_extern = 0;
9453
9454
zfree(&obj->maps);
9455
obj->nr_maps = 0;
9456
9457
if (obj->programs && obj->nr_programs) {
9458
for (i = 0; i < obj->nr_programs; i++)
9459
bpf_program__exit(&obj->programs[i]);
9460
}
9461
zfree(&obj->programs);
9462
9463
zfree(&obj->feat_cache);
9464
zfree(&obj->token_path);
9465
if (obj->token_fd > 0)
9466
close(obj->token_fd);
9467
9468
zfree(&obj->arena_data);
9469
9470
zfree(&obj->jumptables_data);
9471
obj->jumptables_data_sz = 0;
9472
9473
for (i = 0; i < obj->jumptable_map_cnt; i++)
9474
close(obj->jumptable_maps[i].fd);
9475
zfree(&obj->jumptable_maps);
9476
9477
free(obj);
9478
}
9479
9480
const char *bpf_object__name(const struct bpf_object *obj)
9481
{
9482
return obj ? obj->name : libbpf_err_ptr(-EINVAL);
9483
}
9484
9485
unsigned int bpf_object__kversion(const struct bpf_object *obj)
9486
{
9487
return obj ? obj->kern_version : 0;
9488
}
9489
9490
int bpf_object__token_fd(const struct bpf_object *obj)
9491
{
9492
return obj->token_fd ?: -1;
9493
}
9494
9495
struct btf *bpf_object__btf(const struct bpf_object *obj)
9496
{
9497
return obj ? obj->btf : NULL;
9498
}
9499
9500
int bpf_object__btf_fd(const struct bpf_object *obj)
9501
{
9502
return obj->btf ? btf__fd(obj->btf) : -1;
9503
}
9504
9505
int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
9506
{
9507
if (obj->state >= OBJ_LOADED)
9508
return libbpf_err(-EINVAL);
9509
9510
obj->kern_version = kern_version;
9511
9512
return 0;
9513
}
9514
9515
int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
9516
{
9517
struct bpf_gen *gen;
9518
9519
if (!opts)
9520
return libbpf_err(-EFAULT);
9521
if (!OPTS_VALID(opts, gen_loader_opts))
9522
return libbpf_err(-EINVAL);
9523
gen = calloc(1, sizeof(*gen));
9524
if (!gen)
9525
return libbpf_err(-ENOMEM);
9526
gen->opts = opts;
9527
gen->swapped_endian = !is_native_endianness(obj);
9528
obj->gen_loader = gen;
9529
return 0;
9530
}
9531
9532
static struct bpf_program *
9533
__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
9534
bool forward)
9535
{
9536
size_t nr_programs = obj->nr_programs;
9537
ssize_t idx;
9538
9539
if (!nr_programs)
9540
return NULL;
9541
9542
if (!p)
9543
/* Iter from the beginning */
9544
return forward ? &obj->programs[0] :
9545
&obj->programs[nr_programs - 1];
9546
9547
if (p->obj != obj) {
9548
pr_warn("error: program handler doesn't match object\n");
9549
return errno = EINVAL, NULL;
9550
}
9551
9552
idx = (p - obj->programs) + (forward ? 1 : -1);
9553
if (idx >= obj->nr_programs || idx < 0)
9554
return NULL;
9555
return &obj->programs[idx];
9556
}
9557
9558
struct bpf_program *
9559
bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
9560
{
9561
struct bpf_program *prog = prev;
9562
9563
do {
9564
prog = __bpf_program__iter(prog, obj, true);
9565
} while (prog && prog_is_subprog(obj, prog));
9566
9567
return prog;
9568
}
9569
9570
struct bpf_program *
9571
bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
9572
{
9573
struct bpf_program *prog = next;
9574
9575
do {
9576
prog = __bpf_program__iter(prog, obj, false);
9577
} while (prog && prog_is_subprog(obj, prog));
9578
9579
return prog;
9580
}
9581
9582
void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
9583
{
9584
prog->prog_ifindex = ifindex;
9585
}
9586
9587
const char *bpf_program__name(const struct bpf_program *prog)
9588
{
9589
return prog->name;
9590
}
9591
9592
const char *bpf_program__section_name(const struct bpf_program *prog)
9593
{
9594
return prog->sec_name;
9595
}
9596
9597
bool bpf_program__autoload(const struct bpf_program *prog)
9598
{
9599
return prog->autoload;
9600
}
9601
9602
int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
9603
{
9604
if (prog->obj->state >= OBJ_LOADED)
9605
return libbpf_err(-EINVAL);
9606
9607
prog->autoload = autoload;
9608
return 0;
9609
}
9610
9611
bool bpf_program__autoattach(const struct bpf_program *prog)
9612
{
9613
return prog->autoattach;
9614
}
9615
9616
void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
9617
{
9618
prog->autoattach = autoattach;
9619
}
9620
9621
const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
9622
{
9623
return prog->insns;
9624
}
9625
9626
size_t bpf_program__insn_cnt(const struct bpf_program *prog)
9627
{
9628
return prog->insns_cnt;
9629
}
9630
9631
int bpf_program__set_insns(struct bpf_program *prog,
9632
struct bpf_insn *new_insns, size_t new_insn_cnt)
9633
{
9634
struct bpf_insn *insns;
9635
9636
if (prog->obj->state >= OBJ_LOADED)
9637
return libbpf_err(-EBUSY);
9638
9639
insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
9640
/* NULL is a valid return from reallocarray if the new count is zero */
9641
if (!insns && new_insn_cnt) {
9642
pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
9643
return libbpf_err(-ENOMEM);
9644
}
9645
memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
9646
9647
prog->insns = insns;
9648
prog->insns_cnt = new_insn_cnt;
9649
return 0;
9650
}
9651
9652
int bpf_program__fd(const struct bpf_program *prog)
9653
{
9654
if (!prog)
9655
return libbpf_err(-EINVAL);
9656
9657
if (prog->fd < 0)
9658
return libbpf_err(-ENOENT);
9659
9660
return prog->fd;
9661
}
9662
9663
__alias(bpf_program__type)
9664
enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
9665
9666
enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
9667
{
9668
return prog->type;
9669
}
9670
9671
static size_t custom_sec_def_cnt;
9672
static struct bpf_sec_def *custom_sec_defs;
9673
static struct bpf_sec_def custom_fallback_def;
9674
static bool has_custom_fallback_def;
9675
static int last_custom_sec_def_handler_id;
9676
9677
int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
9678
{
9679
if (prog->obj->state >= OBJ_LOADED)
9680
return libbpf_err(-EBUSY);
9681
9682
/* if type is not changed, do nothing */
9683
if (prog->type == type)
9684
return 0;
9685
9686
prog->type = type;
9687
9688
/* If a program type was changed, we need to reset associated SEC()
9689
* handler, as it will be invalid now. The only exception is a generic
9690
* fallback handler, which by definition is program type-agnostic and
9691
* is a catch-all custom handler, optionally set by the application,
9692
* so should be able to handle any type of BPF program.
9693
*/
9694
if (prog->sec_def != &custom_fallback_def)
9695
prog->sec_def = NULL;
9696
return 0;
9697
}
9698
9699
__alias(bpf_program__expected_attach_type)
9700
enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
9701
9702
enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
9703
{
9704
return prog->expected_attach_type;
9705
}
9706
9707
int bpf_program__set_expected_attach_type(struct bpf_program *prog,
9708
enum bpf_attach_type type)
9709
{
9710
if (prog->obj->state >= OBJ_LOADED)
9711
return libbpf_err(-EBUSY);
9712
9713
prog->expected_attach_type = type;
9714
return 0;
9715
}
9716
9717
__u32 bpf_program__flags(const struct bpf_program *prog)
9718
{
9719
return prog->prog_flags;
9720
}
9721
9722
int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
9723
{
9724
if (prog->obj->state >= OBJ_LOADED)
9725
return libbpf_err(-EBUSY);
9726
9727
prog->prog_flags = flags;
9728
return 0;
9729
}
9730
9731
__u32 bpf_program__log_level(const struct bpf_program *prog)
9732
{
9733
return prog->log_level;
9734
}
9735
9736
int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
9737
{
9738
if (prog->obj->state >= OBJ_LOADED)
9739
return libbpf_err(-EBUSY);
9740
9741
prog->log_level = log_level;
9742
return 0;
9743
}
9744
9745
const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
9746
{
9747
*log_size = prog->log_size;
9748
return prog->log_buf;
9749
}
9750
9751
int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
9752
{
9753
if (log_size && !log_buf)
9754
return libbpf_err(-EINVAL);
9755
if (prog->log_size > UINT_MAX)
9756
return libbpf_err(-EINVAL);
9757
if (prog->obj->state >= OBJ_LOADED)
9758
return libbpf_err(-EBUSY);
9759
9760
prog->log_buf = log_buf;
9761
prog->log_size = log_size;
9762
return 0;
9763
}
9764
9765
struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog)
9766
{
9767
if (prog->func_info_rec_size != sizeof(struct bpf_func_info))
9768
return libbpf_err_ptr(-EOPNOTSUPP);
9769
return prog->func_info;
9770
}
9771
9772
__u32 bpf_program__func_info_cnt(const struct bpf_program *prog)
9773
{
9774
return prog->func_info_cnt;
9775
}
9776
9777
struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog)
9778
{
9779
if (prog->line_info_rec_size != sizeof(struct bpf_line_info))
9780
return libbpf_err_ptr(-EOPNOTSUPP);
9781
return prog->line_info;
9782
}
9783
9784
__u32 bpf_program__line_info_cnt(const struct bpf_program *prog)
9785
{
9786
return prog->line_info_cnt;
9787
}
9788
9789
#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
9790
.sec = (char *)sec_pfx, \
9791
.prog_type = BPF_PROG_TYPE_##ptype, \
9792
.expected_attach_type = atype, \
9793
.cookie = (long)(flags), \
9794
.prog_prepare_load_fn = libbpf_prepare_prog_load, \
9795
__VA_ARGS__ \
9796
}
9797
9798
static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9799
static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9800
static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9801
static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9802
static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9803
static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9804
static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9805
static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9806
static int attach_kprobe_session(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9807
static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9808
static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9809
static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9810
9811
static const struct bpf_sec_def section_defs[] = {
9812
SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE),
9813
SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
9814
SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
9815
SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9816
SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9817
SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9818
SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9819
SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9820
SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9821
SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9822
SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9823
SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session),
9824
SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9825
SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9826
SEC_DEF("uprobe.session+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_NONE, attach_uprobe_multi),
9827
SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9828
SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9829
SEC_DEF("uprobe.session.s+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_SLEEPABLE, attach_uprobe_multi),
9830
SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9831
SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9832
SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt),
9833
SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt),
9834
SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
9835
SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */
9836
SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
9837
SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
9838
SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9839
SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9840
SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9841
SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE),
9842
SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE),
9843
SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9844
SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9845
SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9846
SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9847
SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9848
SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9849
SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
9850
SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
9851
SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
9852
SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
9853
SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9854
SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9855
SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9856
SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace),
9857
SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
9858
SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
9859
SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
9860
SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
9861
SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
9862
SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
9863
SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
9864
SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
9865
SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
9866
SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
9867
SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS),
9868
SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
9869
SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE),
9870
SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE),
9871
SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE),
9872
SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE),
9873
SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE),
9874
SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
9875
SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
9876
SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
9877
SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT),
9878
SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE),
9879
SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
9880
SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
9881
SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
9882
SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
9883
SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
9884
SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE),
9885
SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
9886
SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
9887
SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
9888
SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
9889
SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
9890
SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
9891
SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
9892
SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
9893
SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
9894
SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
9895
SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
9896
SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
9897
SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
9898
SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
9899
SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
9900
SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
9901
SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
9902
SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
9903
SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
9904
SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
9905
SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
9906
SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
9907
SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
9908
SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
9909
SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
9910
SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
9911
SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE),
9912
SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE),
9913
SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
9914
SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE),
9915
};
9916
9917
int libbpf_register_prog_handler(const char *sec,
9918
enum bpf_prog_type prog_type,
9919
enum bpf_attach_type exp_attach_type,
9920
const struct libbpf_prog_handler_opts *opts)
9921
{
9922
struct bpf_sec_def *sec_def;
9923
9924
if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
9925
return libbpf_err(-EINVAL);
9926
9927
if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
9928
return libbpf_err(-E2BIG);
9929
9930
if (sec) {
9931
sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
9932
sizeof(*sec_def));
9933
if (!sec_def)
9934
return libbpf_err(-ENOMEM);
9935
9936
custom_sec_defs = sec_def;
9937
sec_def = &custom_sec_defs[custom_sec_def_cnt];
9938
} else {
9939
if (has_custom_fallback_def)
9940
return libbpf_err(-EBUSY);
9941
9942
sec_def = &custom_fallback_def;
9943
}
9944
9945
sec_def->sec = sec ? strdup(sec) : NULL;
9946
if (sec && !sec_def->sec)
9947
return libbpf_err(-ENOMEM);
9948
9949
sec_def->prog_type = prog_type;
9950
sec_def->expected_attach_type = exp_attach_type;
9951
sec_def->cookie = OPTS_GET(opts, cookie, 0);
9952
9953
sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
9954
sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
9955
sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
9956
9957
sec_def->handler_id = ++last_custom_sec_def_handler_id;
9958
9959
if (sec)
9960
custom_sec_def_cnt++;
9961
else
9962
has_custom_fallback_def = true;
9963
9964
return sec_def->handler_id;
9965
}
9966
9967
int libbpf_unregister_prog_handler(int handler_id)
9968
{
9969
struct bpf_sec_def *sec_defs;
9970
int i;
9971
9972
if (handler_id <= 0)
9973
return libbpf_err(-EINVAL);
9974
9975
if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
9976
memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
9977
has_custom_fallback_def = false;
9978
return 0;
9979
}
9980
9981
for (i = 0; i < custom_sec_def_cnt; i++) {
9982
if (custom_sec_defs[i].handler_id == handler_id)
9983
break;
9984
}
9985
9986
if (i == custom_sec_def_cnt)
9987
return libbpf_err(-ENOENT);
9988
9989
free(custom_sec_defs[i].sec);
9990
for (i = i + 1; i < custom_sec_def_cnt; i++)
9991
custom_sec_defs[i - 1] = custom_sec_defs[i];
9992
custom_sec_def_cnt--;
9993
9994
/* try to shrink the array, but it's ok if we couldn't */
9995
sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
9996
/* if new count is zero, reallocarray can return a valid NULL result;
9997
* in this case the previous pointer will be freed, so we *have to*
9998
* reassign old pointer to the new value (even if it's NULL)
9999
*/
10000
if (sec_defs || custom_sec_def_cnt == 0)
10001
custom_sec_defs = sec_defs;
10002
10003
return 0;
10004
}
10005
10006
static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
10007
{
10008
size_t len = strlen(sec_def->sec);
10009
10010
/* "type/" always has to have proper SEC("type/extras") form */
10011
if (sec_def->sec[len - 1] == '/') {
10012
if (str_has_pfx(sec_name, sec_def->sec))
10013
return true;
10014
return false;
10015
}
10016
10017
/* "type+" means it can be either exact SEC("type") or
10018
* well-formed SEC("type/extras") with proper '/' separator
10019
*/
10020
if (sec_def->sec[len - 1] == '+') {
10021
len--;
10022
/* not even a prefix */
10023
if (strncmp(sec_name, sec_def->sec, len) != 0)
10024
return false;
10025
/* exact match or has '/' separator */
10026
if (sec_name[len] == '\0' || sec_name[len] == '/')
10027
return true;
10028
return false;
10029
}
10030
10031
return strcmp(sec_name, sec_def->sec) == 0;
10032
}
10033
10034
static const struct bpf_sec_def *find_sec_def(const char *sec_name)
10035
{
10036
const struct bpf_sec_def *sec_def;
10037
int i, n;
10038
10039
n = custom_sec_def_cnt;
10040
for (i = 0; i < n; i++) {
10041
sec_def = &custom_sec_defs[i];
10042
if (sec_def_matches(sec_def, sec_name))
10043
return sec_def;
10044
}
10045
10046
n = ARRAY_SIZE(section_defs);
10047
for (i = 0; i < n; i++) {
10048
sec_def = &section_defs[i];
10049
if (sec_def_matches(sec_def, sec_name))
10050
return sec_def;
10051
}
10052
10053
if (has_custom_fallback_def)
10054
return &custom_fallback_def;
10055
10056
return NULL;
10057
}
10058
10059
#define MAX_TYPE_NAME_SIZE 32
10060
10061
static char *libbpf_get_type_names(bool attach_type)
10062
{
10063
int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
10064
char *buf;
10065
10066
buf = malloc(len);
10067
if (!buf)
10068
return NULL;
10069
10070
buf[0] = '\0';
10071
/* Forge string buf with all available names */
10072
for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
10073
const struct bpf_sec_def *sec_def = &section_defs[i];
10074
10075
if (attach_type) {
10076
if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
10077
continue;
10078
10079
if (!(sec_def->cookie & SEC_ATTACHABLE))
10080
continue;
10081
}
10082
10083
if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
10084
free(buf);
10085
return NULL;
10086
}
10087
strcat(buf, " ");
10088
strcat(buf, section_defs[i].sec);
10089
}
10090
10091
return buf;
10092
}
10093
10094
int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
10095
enum bpf_attach_type *expected_attach_type)
10096
{
10097
const struct bpf_sec_def *sec_def;
10098
char *type_names;
10099
10100
if (!name)
10101
return libbpf_err(-EINVAL);
10102
10103
sec_def = find_sec_def(name);
10104
if (sec_def) {
10105
*prog_type = sec_def->prog_type;
10106
*expected_attach_type = sec_def->expected_attach_type;
10107
return 0;
10108
}
10109
10110
pr_debug("failed to guess program type from ELF section '%s'\n", name);
10111
type_names = libbpf_get_type_names(false);
10112
if (type_names != NULL) {
10113
pr_debug("supported section(type) names are:%s\n", type_names);
10114
free(type_names);
10115
}
10116
10117
return libbpf_err(-ESRCH);
10118
}
10119
10120
const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
10121
{
10122
if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
10123
return NULL;
10124
10125
return attach_type_name[t];
10126
}
10127
10128
const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
10129
{
10130
if (t < 0 || t >= ARRAY_SIZE(link_type_name))
10131
return NULL;
10132
10133
return link_type_name[t];
10134
}
10135
10136
const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
10137
{
10138
if (t < 0 || t >= ARRAY_SIZE(map_type_name))
10139
return NULL;
10140
10141
return map_type_name[t];
10142
}
10143
10144
const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
10145
{
10146
if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
10147
return NULL;
10148
10149
return prog_type_name[t];
10150
}
10151
10152
static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
10153
int sec_idx,
10154
size_t offset)
10155
{
10156
struct bpf_map *map;
10157
size_t i;
10158
10159
for (i = 0; i < obj->nr_maps; i++) {
10160
map = &obj->maps[i];
10161
if (!bpf_map__is_struct_ops(map))
10162
continue;
10163
if (map->sec_idx == sec_idx &&
10164
map->sec_offset <= offset &&
10165
offset - map->sec_offset < map->def.value_size)
10166
return map;
10167
}
10168
10169
return NULL;
10170
}
10171
10172
/* Collect the reloc from ELF, populate the st_ops->progs[], and update
10173
* st_ops->data for shadow type.
10174
*/
10175
static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
10176
Elf64_Shdr *shdr, Elf_Data *data)
10177
{
10178
const struct btf_type *type;
10179
const struct btf_member *member;
10180
struct bpf_struct_ops *st_ops;
10181
struct bpf_program *prog;
10182
unsigned int shdr_idx;
10183
const struct btf *btf;
10184
struct bpf_map *map;
10185
unsigned int moff, insn_idx;
10186
const char *name;
10187
__u32 member_idx;
10188
Elf64_Sym *sym;
10189
Elf64_Rel *rel;
10190
int i, nrels;
10191
10192
btf = obj->btf;
10193
nrels = shdr->sh_size / shdr->sh_entsize;
10194
for (i = 0; i < nrels; i++) {
10195
rel = elf_rel_by_idx(data, i);
10196
if (!rel) {
10197
pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
10198
return -LIBBPF_ERRNO__FORMAT;
10199
}
10200
10201
sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
10202
if (!sym) {
10203
pr_warn("struct_ops reloc: symbol %zx not found\n",
10204
(size_t)ELF64_R_SYM(rel->r_info));
10205
return -LIBBPF_ERRNO__FORMAT;
10206
}
10207
10208
name = elf_sym_str(obj, sym->st_name) ?: "<?>";
10209
map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
10210
if (!map) {
10211
pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
10212
(size_t)rel->r_offset);
10213
return -EINVAL;
10214
}
10215
10216
moff = rel->r_offset - map->sec_offset;
10217
shdr_idx = sym->st_shndx;
10218
st_ops = map->st_ops;
10219
pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
10220
map->name,
10221
(long long)(rel->r_info >> 32),
10222
(long long)sym->st_value,
10223
shdr_idx, (size_t)rel->r_offset,
10224
map->sec_offset, sym->st_name, name);
10225
10226
if (shdr_idx >= SHN_LORESERVE) {
10227
pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
10228
map->name, (size_t)rel->r_offset, shdr_idx);
10229
return -LIBBPF_ERRNO__RELOC;
10230
}
10231
if (sym->st_value % BPF_INSN_SZ) {
10232
pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
10233
map->name, (unsigned long long)sym->st_value);
10234
return -LIBBPF_ERRNO__FORMAT;
10235
}
10236
insn_idx = sym->st_value / BPF_INSN_SZ;
10237
10238
type = btf__type_by_id(btf, st_ops->type_id);
10239
member = find_member_by_offset(type, moff * 8);
10240
if (!member) {
10241
pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
10242
map->name, moff);
10243
return -EINVAL;
10244
}
10245
member_idx = member - btf_members(type);
10246
name = btf__name_by_offset(btf, member->name_off);
10247
10248
if (!resolve_func_ptr(btf, member->type, NULL)) {
10249
pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
10250
map->name, name);
10251
return -EINVAL;
10252
}
10253
10254
prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
10255
if (!prog) {
10256
pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
10257
map->name, shdr_idx, name);
10258
return -EINVAL;
10259
}
10260
10261
/* prevent the use of BPF prog with invalid type */
10262
if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
10263
pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
10264
map->name, prog->name);
10265
return -EINVAL;
10266
}
10267
10268
st_ops->progs[member_idx] = prog;
10269
10270
/* st_ops->data will be exposed to users, being returned by
10271
* bpf_map__initial_value() as a pointer to the shadow
10272
* type. All function pointers in the original struct type
10273
* should be converted to a pointer to struct bpf_program
10274
* in the shadow type.
10275
*/
10276
*((struct bpf_program **)(st_ops->data + moff)) = prog;
10277
}
10278
10279
return 0;
10280
}
10281
10282
#define BTF_TRACE_PREFIX "btf_trace_"
10283
#define BTF_LSM_PREFIX "bpf_lsm_"
10284
#define BTF_ITER_PREFIX "bpf_iter_"
10285
#define BTF_MAX_NAME_SIZE 128
10286
10287
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
10288
const char **prefix, int *kind)
10289
{
10290
switch (attach_type) {
10291
case BPF_TRACE_RAW_TP:
10292
*prefix = BTF_TRACE_PREFIX;
10293
*kind = BTF_KIND_TYPEDEF;
10294
break;
10295
case BPF_LSM_MAC:
10296
case BPF_LSM_CGROUP:
10297
*prefix = BTF_LSM_PREFIX;
10298
*kind = BTF_KIND_FUNC;
10299
break;
10300
case BPF_TRACE_ITER:
10301
*prefix = BTF_ITER_PREFIX;
10302
*kind = BTF_KIND_FUNC;
10303
break;
10304
default:
10305
*prefix = "";
10306
*kind = BTF_KIND_FUNC;
10307
}
10308
}
10309
10310
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
10311
const char *name, __u32 kind)
10312
{
10313
char btf_type_name[BTF_MAX_NAME_SIZE];
10314
int ret;
10315
10316
ret = snprintf(btf_type_name, sizeof(btf_type_name),
10317
"%s%s", prefix, name);
10318
/* snprintf returns the number of characters written excluding the
10319
* terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
10320
* indicates truncation.
10321
*/
10322
if (ret < 0 || ret >= sizeof(btf_type_name))
10323
return -ENAMETOOLONG;
10324
return btf__find_by_name_kind(btf, btf_type_name, kind);
10325
}
10326
10327
static inline int find_attach_btf_id(struct btf *btf, const char *name,
10328
enum bpf_attach_type attach_type)
10329
{
10330
const char *prefix;
10331
int kind;
10332
10333
btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
10334
return find_btf_by_prefix_kind(btf, prefix, name, kind);
10335
}
10336
10337
int libbpf_find_vmlinux_btf_id(const char *name,
10338
enum bpf_attach_type attach_type)
10339
{
10340
struct btf *btf;
10341
int err;
10342
10343
btf = btf__load_vmlinux_btf();
10344
err = libbpf_get_error(btf);
10345
if (err) {
10346
pr_warn("vmlinux BTF is not found\n");
10347
return libbpf_err(err);
10348
}
10349
10350
err = find_attach_btf_id(btf, name, attach_type);
10351
if (err <= 0)
10352
pr_warn("%s is not found in vmlinux BTF\n", name);
10353
10354
btf__free(btf);
10355
return libbpf_err(err);
10356
}
10357
10358
static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd)
10359
{
10360
struct bpf_prog_info info;
10361
__u32 info_len = sizeof(info);
10362
struct btf *btf;
10363
int err;
10364
10365
memset(&info, 0, info_len);
10366
err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
10367
if (err) {
10368
pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %s\n",
10369
attach_prog_fd, errstr(err));
10370
return err;
10371
}
10372
10373
err = -EINVAL;
10374
if (!info.btf_id) {
10375
pr_warn("The target program doesn't have BTF\n");
10376
goto out;
10377
}
10378
btf = btf_load_from_kernel(info.btf_id, NULL, token_fd);
10379
err = libbpf_get_error(btf);
10380
if (err) {
10381
pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err));
10382
goto out;
10383
}
10384
err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
10385
btf__free(btf);
10386
if (err <= 0) {
10387
pr_warn("%s is not found in prog's BTF\n", name);
10388
goto out;
10389
}
10390
out:
10391
return err;
10392
}
10393
10394
static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
10395
enum bpf_attach_type attach_type,
10396
int *btf_obj_fd, int *btf_type_id)
10397
{
10398
int ret, i, mod_len = 0;
10399
const char *fn_name, *mod_name = NULL;
10400
10401
fn_name = strchr(attach_name, ':');
10402
if (fn_name) {
10403
mod_name = attach_name;
10404
mod_len = fn_name - mod_name;
10405
fn_name++;
10406
}
10407
10408
if (!mod_name || strncmp(mod_name, "vmlinux", mod_len) == 0) {
10409
ret = find_attach_btf_id(obj->btf_vmlinux,
10410
mod_name ? fn_name : attach_name,
10411
attach_type);
10412
if (ret > 0) {
10413
*btf_obj_fd = 0; /* vmlinux BTF */
10414
*btf_type_id = ret;
10415
return 0;
10416
}
10417
if (ret != -ENOENT)
10418
return ret;
10419
}
10420
10421
ret = load_module_btfs(obj);
10422
if (ret)
10423
return ret;
10424
10425
for (i = 0; i < obj->btf_module_cnt; i++) {
10426
const struct module_btf *mod = &obj->btf_modules[i];
10427
10428
if (mod_name && strncmp(mod->name, mod_name, mod_len) != 0)
10429
continue;
10430
10431
ret = find_attach_btf_id(mod->btf,
10432
mod_name ? fn_name : attach_name,
10433
attach_type);
10434
if (ret > 0) {
10435
*btf_obj_fd = mod->fd;
10436
*btf_type_id = ret;
10437
return 0;
10438
}
10439
if (ret == -ENOENT)
10440
continue;
10441
10442
return ret;
10443
}
10444
10445
return -ESRCH;
10446
}
10447
10448
static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
10449
int *btf_obj_fd, int *btf_type_id)
10450
{
10451
enum bpf_attach_type attach_type = prog->expected_attach_type;
10452
__u32 attach_prog_fd = prog->attach_prog_fd;
10453
int err = 0;
10454
10455
/* BPF program's BTF ID */
10456
if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
10457
if (!attach_prog_fd) {
10458
pr_warn("prog '%s': attach program FD is not set\n", prog->name);
10459
return -EINVAL;
10460
}
10461
err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd);
10462
if (err < 0) {
10463
pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n",
10464
prog->name, attach_prog_fd, attach_name, errstr(err));
10465
return err;
10466
}
10467
*btf_obj_fd = 0;
10468
*btf_type_id = err;
10469
return 0;
10470
}
10471
10472
/* kernel/module BTF ID */
10473
if (prog->obj->gen_loader) {
10474
bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
10475
*btf_obj_fd = 0;
10476
*btf_type_id = 1;
10477
} else {
10478
err = find_kernel_btf_id(prog->obj, attach_name,
10479
attach_type, btf_obj_fd,
10480
btf_type_id);
10481
}
10482
if (err) {
10483
pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %s\n",
10484
prog->name, attach_name, errstr(err));
10485
return err;
10486
}
10487
return 0;
10488
}
10489
10490
int libbpf_attach_type_by_name(const char *name,
10491
enum bpf_attach_type *attach_type)
10492
{
10493
char *type_names;
10494
const struct bpf_sec_def *sec_def;
10495
10496
if (!name)
10497
return libbpf_err(-EINVAL);
10498
10499
sec_def = find_sec_def(name);
10500
if (!sec_def) {
10501
pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
10502
type_names = libbpf_get_type_names(true);
10503
if (type_names != NULL) {
10504
pr_debug("attachable section(type) names are:%s\n", type_names);
10505
free(type_names);
10506
}
10507
10508
return libbpf_err(-EINVAL);
10509
}
10510
10511
if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
10512
return libbpf_err(-EINVAL);
10513
if (!(sec_def->cookie & SEC_ATTACHABLE))
10514
return libbpf_err(-EINVAL);
10515
10516
*attach_type = sec_def->expected_attach_type;
10517
return 0;
10518
}
10519
10520
int bpf_map__fd(const struct bpf_map *map)
10521
{
10522
if (!map)
10523
return libbpf_err(-EINVAL);
10524
if (!map_is_created(map))
10525
return -1;
10526
return map->fd;
10527
}
10528
10529
static bool map_uses_real_name(const struct bpf_map *map)
10530
{
10531
/* Since libbpf started to support custom .data.* and .rodata.* maps,
10532
* their user-visible name differs from kernel-visible name. Users see
10533
* such map's corresponding ELF section name as a map name.
10534
* This check distinguishes .data/.rodata from .data.* and .rodata.*
10535
* maps to know which name has to be returned to the user.
10536
*/
10537
if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
10538
return true;
10539
if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
10540
return true;
10541
return false;
10542
}
10543
10544
const char *bpf_map__name(const struct bpf_map *map)
10545
{
10546
if (!map)
10547
return NULL;
10548
10549
if (map_uses_real_name(map))
10550
return map->real_name;
10551
10552
return map->name;
10553
}
10554
10555
enum bpf_map_type bpf_map__type(const struct bpf_map *map)
10556
{
10557
return map->def.type;
10558
}
10559
10560
int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
10561
{
10562
if (map_is_created(map))
10563
return libbpf_err(-EBUSY);
10564
map->def.type = type;
10565
return 0;
10566
}
10567
10568
__u32 bpf_map__map_flags(const struct bpf_map *map)
10569
{
10570
return map->def.map_flags;
10571
}
10572
10573
int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
10574
{
10575
if (map_is_created(map))
10576
return libbpf_err(-EBUSY);
10577
map->def.map_flags = flags;
10578
return 0;
10579
}
10580
10581
__u64 bpf_map__map_extra(const struct bpf_map *map)
10582
{
10583
return map->map_extra;
10584
}
10585
10586
int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
10587
{
10588
if (map_is_created(map))
10589
return libbpf_err(-EBUSY);
10590
map->map_extra = map_extra;
10591
return 0;
10592
}
10593
10594
__u32 bpf_map__numa_node(const struct bpf_map *map)
10595
{
10596
return map->numa_node;
10597
}
10598
10599
int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
10600
{
10601
if (map_is_created(map))
10602
return libbpf_err(-EBUSY);
10603
map->numa_node = numa_node;
10604
return 0;
10605
}
10606
10607
__u32 bpf_map__key_size(const struct bpf_map *map)
10608
{
10609
return map->def.key_size;
10610
}
10611
10612
int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
10613
{
10614
if (map_is_created(map))
10615
return libbpf_err(-EBUSY);
10616
map->def.key_size = size;
10617
return 0;
10618
}
10619
10620
__u32 bpf_map__value_size(const struct bpf_map *map)
10621
{
10622
return map->def.value_size;
10623
}
10624
10625
static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
10626
{
10627
struct btf *btf;
10628
struct btf_type *datasec_type, *var_type;
10629
struct btf_var_secinfo *var;
10630
const struct btf_type *array_type;
10631
const struct btf_array *array;
10632
int vlen, element_sz, new_array_id;
10633
__u32 nr_elements;
10634
10635
/* check btf existence */
10636
btf = bpf_object__btf(map->obj);
10637
if (!btf)
10638
return -ENOENT;
10639
10640
/* verify map is datasec */
10641
datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
10642
if (!btf_is_datasec(datasec_type)) {
10643
pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
10644
bpf_map__name(map));
10645
return -EINVAL;
10646
}
10647
10648
/* verify datasec has at least one var */
10649
vlen = btf_vlen(datasec_type);
10650
if (vlen == 0) {
10651
pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
10652
bpf_map__name(map));
10653
return -EINVAL;
10654
}
10655
10656
/* verify last var in the datasec is an array */
10657
var = &btf_var_secinfos(datasec_type)[vlen - 1];
10658
var_type = btf_type_by_id(btf, var->type);
10659
array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
10660
if (!btf_is_array(array_type)) {
10661
pr_warn("map '%s': cannot be resized, last var must be an array\n",
10662
bpf_map__name(map));
10663
return -EINVAL;
10664
}
10665
10666
/* verify request size aligns with array */
10667
array = btf_array(array_type);
10668
element_sz = btf__resolve_size(btf, array->type);
10669
if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
10670
pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
10671
bpf_map__name(map), element_sz, size);
10672
return -EINVAL;
10673
}
10674
10675
/* create a new array based on the existing array, but with new length */
10676
nr_elements = (size - var->offset) / element_sz;
10677
new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
10678
if (new_array_id < 0)
10679
return new_array_id;
10680
10681
/* adding a new btf type invalidates existing pointers to btf objects,
10682
* so refresh pointers before proceeding
10683
*/
10684
datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
10685
var = &btf_var_secinfos(datasec_type)[vlen - 1];
10686
var_type = btf_type_by_id(btf, var->type);
10687
10688
/* finally update btf info */
10689
datasec_type->size = size;
10690
var->size = size - var->offset;
10691
var_type->type = new_array_id;
10692
10693
return 0;
10694
}
10695
10696
int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
10697
{
10698
if (map_is_created(map))
10699
return libbpf_err(-EBUSY);
10700
10701
if (map->mmaped) {
10702
size_t mmap_old_sz, mmap_new_sz;
10703
int err;
10704
10705
if (map->def.type != BPF_MAP_TYPE_ARRAY)
10706
return libbpf_err(-EOPNOTSUPP);
10707
10708
mmap_old_sz = bpf_map_mmap_sz(map);
10709
mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
10710
err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
10711
if (err) {
10712
pr_warn("map '%s': failed to resize memory-mapped region: %s\n",
10713
bpf_map__name(map), errstr(err));
10714
return libbpf_err(err);
10715
}
10716
err = map_btf_datasec_resize(map, size);
10717
if (err && err != -ENOENT) {
10718
pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %s\n",
10719
bpf_map__name(map), errstr(err));
10720
map->btf_value_type_id = 0;
10721
map->btf_key_type_id = 0;
10722
}
10723
}
10724
10725
map->def.value_size = size;
10726
return 0;
10727
}
10728
10729
__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
10730
{
10731
return map ? map->btf_key_type_id : 0;
10732
}
10733
10734
__u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
10735
{
10736
return map ? map->btf_value_type_id : 0;
10737
}
10738
10739
int bpf_map__set_initial_value(struct bpf_map *map,
10740
const void *data, size_t size)
10741
{
10742
size_t actual_sz;
10743
10744
if (map_is_created(map))
10745
return libbpf_err(-EBUSY);
10746
10747
if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
10748
return libbpf_err(-EINVAL);
10749
10750
if (map->def.type == BPF_MAP_TYPE_ARENA)
10751
actual_sz = map->obj->arena_data_sz;
10752
else
10753
actual_sz = map->def.value_size;
10754
if (size != actual_sz)
10755
return libbpf_err(-EINVAL);
10756
10757
memcpy(map->mmaped, data, size);
10758
return 0;
10759
}
10760
10761
void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
10762
{
10763
if (bpf_map__is_struct_ops(map)) {
10764
if (psize)
10765
*psize = map->def.value_size;
10766
return map->st_ops->data;
10767
}
10768
10769
if (!map->mmaped)
10770
return NULL;
10771
10772
if (map->def.type == BPF_MAP_TYPE_ARENA)
10773
*psize = map->obj->arena_data_sz;
10774
else
10775
*psize = map->def.value_size;
10776
10777
return map->mmaped;
10778
}
10779
10780
bool bpf_map__is_internal(const struct bpf_map *map)
10781
{
10782
return map->libbpf_type != LIBBPF_MAP_UNSPEC;
10783
}
10784
10785
__u32 bpf_map__ifindex(const struct bpf_map *map)
10786
{
10787
return map->map_ifindex;
10788
}
10789
10790
int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
10791
{
10792
if (map_is_created(map))
10793
return libbpf_err(-EBUSY);
10794
map->map_ifindex = ifindex;
10795
return 0;
10796
}
10797
10798
int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
10799
{
10800
if (!bpf_map_type__is_map_in_map(map->def.type)) {
10801
pr_warn("error: unsupported map type\n");
10802
return libbpf_err(-EINVAL);
10803
}
10804
if (map->inner_map_fd != -1) {
10805
pr_warn("error: inner_map_fd already specified\n");
10806
return libbpf_err(-EINVAL);
10807
}
10808
if (map->inner_map) {
10809
bpf_map__destroy(map->inner_map);
10810
zfree(&map->inner_map);
10811
}
10812
map->inner_map_fd = fd;
10813
return 0;
10814
}
10815
10816
int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog)
10817
{
10818
if (map_is_created(map)) {
10819
pr_warn("exclusive programs must be set before map creation\n");
10820
return libbpf_err(-EINVAL);
10821
}
10822
10823
if (map->obj != prog->obj) {
10824
pr_warn("excl_prog and map must be from the same bpf object\n");
10825
return libbpf_err(-EINVAL);
10826
}
10827
10828
map->excl_prog = prog;
10829
return 0;
10830
}
10831
10832
struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map)
10833
{
10834
return map->excl_prog;
10835
}
10836
10837
static struct bpf_map *
10838
__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
10839
{
10840
ssize_t idx;
10841
struct bpf_map *s, *e;
10842
10843
if (!obj || !obj->maps)
10844
return errno = EINVAL, NULL;
10845
10846
s = obj->maps;
10847
e = obj->maps + obj->nr_maps;
10848
10849
if ((m < s) || (m >= e)) {
10850
pr_warn("error in %s: map handler doesn't belong to object\n",
10851
__func__);
10852
return errno = EINVAL, NULL;
10853
}
10854
10855
idx = (m - obj->maps) + i;
10856
if (idx >= obj->nr_maps || idx < 0)
10857
return NULL;
10858
return &obj->maps[idx];
10859
}
10860
10861
struct bpf_map *
10862
bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
10863
{
10864
if (prev == NULL && obj != NULL)
10865
return obj->maps;
10866
10867
return __bpf_map__iter(prev, obj, 1);
10868
}
10869
10870
struct bpf_map *
10871
bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
10872
{
10873
if (next == NULL && obj != NULL) {
10874
if (!obj->nr_maps)
10875
return NULL;
10876
return obj->maps + obj->nr_maps - 1;
10877
}
10878
10879
return __bpf_map__iter(next, obj, -1);
10880
}
10881
10882
struct bpf_map *
10883
bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
10884
{
10885
struct bpf_map *pos;
10886
10887
bpf_object__for_each_map(pos, obj) {
10888
/* if it's a special internal map name (which always starts
10889
* with dot) then check if that special name matches the
10890
* real map name (ELF section name)
10891
*/
10892
if (name[0] == '.') {
10893
if (pos->real_name && strcmp(pos->real_name, name) == 0)
10894
return pos;
10895
continue;
10896
}
10897
/* otherwise map name has to be an exact match */
10898
if (map_uses_real_name(pos)) {
10899
if (strcmp(pos->real_name, name) == 0)
10900
return pos;
10901
continue;
10902
}
10903
if (strcmp(pos->name, name) == 0)
10904
return pos;
10905
}
10906
return errno = ENOENT, NULL;
10907
}
10908
10909
int
10910
bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
10911
{
10912
return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
10913
}
10914
10915
static int validate_map_op(const struct bpf_map *map, size_t key_sz,
10916
size_t value_sz, bool check_value_sz)
10917
{
10918
if (!map_is_created(map)) /* map is not yet created */
10919
return -ENOENT;
10920
10921
if (map->def.key_size != key_sz) {
10922
pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
10923
map->name, key_sz, map->def.key_size);
10924
return -EINVAL;
10925
}
10926
10927
if (map->fd < 0) {
10928
pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
10929
return -EINVAL;
10930
}
10931
10932
if (!check_value_sz)
10933
return 0;
10934
10935
switch (map->def.type) {
10936
case BPF_MAP_TYPE_PERCPU_ARRAY:
10937
case BPF_MAP_TYPE_PERCPU_HASH:
10938
case BPF_MAP_TYPE_LRU_PERCPU_HASH:
10939
case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
10940
int num_cpu = libbpf_num_possible_cpus();
10941
size_t elem_sz = roundup(map->def.value_size, 8);
10942
10943
if (value_sz != num_cpu * elem_sz) {
10944
pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
10945
map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
10946
return -EINVAL;
10947
}
10948
break;
10949
}
10950
default:
10951
if (map->def.value_size != value_sz) {
10952
pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
10953
map->name, value_sz, map->def.value_size);
10954
return -EINVAL;
10955
}
10956
break;
10957
}
10958
return 0;
10959
}
10960
10961
int bpf_map__lookup_elem(const struct bpf_map *map,
10962
const void *key, size_t key_sz,
10963
void *value, size_t value_sz, __u64 flags)
10964
{
10965
int err;
10966
10967
err = validate_map_op(map, key_sz, value_sz, true);
10968
if (err)
10969
return libbpf_err(err);
10970
10971
return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
10972
}
10973
10974
int bpf_map__update_elem(const struct bpf_map *map,
10975
const void *key, size_t key_sz,
10976
const void *value, size_t value_sz, __u64 flags)
10977
{
10978
int err;
10979
10980
err = validate_map_op(map, key_sz, value_sz, true);
10981
if (err)
10982
return libbpf_err(err);
10983
10984
return bpf_map_update_elem(map->fd, key, value, flags);
10985
}
10986
10987
int bpf_map__delete_elem(const struct bpf_map *map,
10988
const void *key, size_t key_sz, __u64 flags)
10989
{
10990
int err;
10991
10992
err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10993
if (err)
10994
return libbpf_err(err);
10995
10996
return bpf_map_delete_elem_flags(map->fd, key, flags);
10997
}
10998
10999
int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
11000
const void *key, size_t key_sz,
11001
void *value, size_t value_sz, __u64 flags)
11002
{
11003
int err;
11004
11005
err = validate_map_op(map, key_sz, value_sz, true);
11006
if (err)
11007
return libbpf_err(err);
11008
11009
return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
11010
}
11011
11012
int bpf_map__get_next_key(const struct bpf_map *map,
11013
const void *cur_key, void *next_key, size_t key_sz)
11014
{
11015
int err;
11016
11017
err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
11018
if (err)
11019
return libbpf_err(err);
11020
11021
return bpf_map_get_next_key(map->fd, cur_key, next_key);
11022
}
11023
11024
long libbpf_get_error(const void *ptr)
11025
{
11026
if (!IS_ERR_OR_NULL(ptr))
11027
return 0;
11028
11029
if (IS_ERR(ptr))
11030
errno = -PTR_ERR(ptr);
11031
11032
/* If ptr == NULL, then errno should be already set by the failing
11033
* API, because libbpf never returns NULL on success and it now always
11034
* sets errno on error. So no extra errno handling for ptr == NULL
11035
* case.
11036
*/
11037
return -errno;
11038
}
11039
11040
/* Replace link's underlying BPF program with the new one */
11041
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
11042
{
11043
int ret;
11044
int prog_fd = bpf_program__fd(prog);
11045
11046
if (prog_fd < 0) {
11047
pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n",
11048
prog->name);
11049
return libbpf_err(-EINVAL);
11050
}
11051
11052
ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL);
11053
return libbpf_err_errno(ret);
11054
}
11055
11056
/* Release "ownership" of underlying BPF resource (typically, BPF program
11057
* attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
11058
* link, when destructed through bpf_link__destroy() call won't attempt to
11059
* detach/unregisted that BPF resource. This is useful in situations where,
11060
* say, attached BPF program has to outlive userspace program that attached it
11061
* in the system. Depending on type of BPF program, though, there might be
11062
* additional steps (like pinning BPF program in BPF FS) necessary to ensure
11063
* exit of userspace program doesn't trigger automatic detachment and clean up
11064
* inside the kernel.
11065
*/
11066
void bpf_link__disconnect(struct bpf_link *link)
11067
{
11068
link->disconnected = true;
11069
}
11070
11071
int bpf_link__destroy(struct bpf_link *link)
11072
{
11073
int err = 0;
11074
11075
if (IS_ERR_OR_NULL(link))
11076
return 0;
11077
11078
if (!link->disconnected && link->detach)
11079
err = link->detach(link);
11080
if (link->pin_path)
11081
free(link->pin_path);
11082
if (link->dealloc)
11083
link->dealloc(link);
11084
else
11085
free(link);
11086
11087
return libbpf_err(err);
11088
}
11089
11090
int bpf_link__fd(const struct bpf_link *link)
11091
{
11092
return link->fd;
11093
}
11094
11095
const char *bpf_link__pin_path(const struct bpf_link *link)
11096
{
11097
return link->pin_path;
11098
}
11099
11100
static int bpf_link__detach_fd(struct bpf_link *link)
11101
{
11102
return libbpf_err_errno(close(link->fd));
11103
}
11104
11105
struct bpf_link *bpf_link__open(const char *path)
11106
{
11107
struct bpf_link *link;
11108
int fd;
11109
11110
fd = bpf_obj_get(path);
11111
if (fd < 0) {
11112
fd = -errno;
11113
pr_warn("failed to open link at %s: %d\n", path, fd);
11114
return libbpf_err_ptr(fd);
11115
}
11116
11117
link = calloc(1, sizeof(*link));
11118
if (!link) {
11119
close(fd);
11120
return libbpf_err_ptr(-ENOMEM);
11121
}
11122
link->detach = &bpf_link__detach_fd;
11123
link->fd = fd;
11124
11125
link->pin_path = strdup(path);
11126
if (!link->pin_path) {
11127
bpf_link__destroy(link);
11128
return libbpf_err_ptr(-ENOMEM);
11129
}
11130
11131
return link;
11132
}
11133
11134
int bpf_link__detach(struct bpf_link *link)
11135
{
11136
return bpf_link_detach(link->fd) ? -errno : 0;
11137
}
11138
11139
int bpf_link__pin(struct bpf_link *link, const char *path)
11140
{
11141
int err;
11142
11143
if (link->pin_path)
11144
return libbpf_err(-EBUSY);
11145
err = make_parent_dir(path);
11146
if (err)
11147
return libbpf_err(err);
11148
err = check_path(path);
11149
if (err)
11150
return libbpf_err(err);
11151
11152
link->pin_path = strdup(path);
11153
if (!link->pin_path)
11154
return libbpf_err(-ENOMEM);
11155
11156
if (bpf_obj_pin(link->fd, link->pin_path)) {
11157
err = -errno;
11158
zfree(&link->pin_path);
11159
return libbpf_err(err);
11160
}
11161
11162
pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
11163
return 0;
11164
}
11165
11166
int bpf_link__unpin(struct bpf_link *link)
11167
{
11168
int err;
11169
11170
if (!link->pin_path)
11171
return libbpf_err(-EINVAL);
11172
11173
err = unlink(link->pin_path);
11174
if (err != 0)
11175
return -errno;
11176
11177
pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
11178
zfree(&link->pin_path);
11179
return 0;
11180
}
11181
11182
struct bpf_link_perf {
11183
struct bpf_link link;
11184
int perf_event_fd;
11185
/* legacy kprobe support: keep track of probe identifier and type */
11186
char *legacy_probe_name;
11187
bool legacy_is_kprobe;
11188
bool legacy_is_retprobe;
11189
};
11190
11191
static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
11192
static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
11193
11194
static int bpf_link_perf_detach(struct bpf_link *link)
11195
{
11196
struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11197
int err = 0;
11198
11199
if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
11200
err = -errno;
11201
11202
if (perf_link->perf_event_fd != link->fd)
11203
close(perf_link->perf_event_fd);
11204
close(link->fd);
11205
11206
/* legacy uprobe/kprobe needs to be removed after perf event fd closure */
11207
if (perf_link->legacy_probe_name) {
11208
if (perf_link->legacy_is_kprobe) {
11209
err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
11210
perf_link->legacy_is_retprobe);
11211
} else {
11212
err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
11213
perf_link->legacy_is_retprobe);
11214
}
11215
}
11216
11217
return err;
11218
}
11219
11220
static void bpf_link_perf_dealloc(struct bpf_link *link)
11221
{
11222
struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11223
11224
free(perf_link->legacy_probe_name);
11225
free(perf_link);
11226
}
11227
11228
struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
11229
const struct bpf_perf_event_opts *opts)
11230
{
11231
struct bpf_link_perf *link;
11232
int prog_fd, link_fd = -1, err;
11233
bool force_ioctl_attach;
11234
11235
if (!OPTS_VALID(opts, bpf_perf_event_opts))
11236
return libbpf_err_ptr(-EINVAL);
11237
11238
if (pfd < 0) {
11239
pr_warn("prog '%s': invalid perf event FD %d\n",
11240
prog->name, pfd);
11241
return libbpf_err_ptr(-EINVAL);
11242
}
11243
prog_fd = bpf_program__fd(prog);
11244
if (prog_fd < 0) {
11245
pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
11246
prog->name);
11247
return libbpf_err_ptr(-EINVAL);
11248
}
11249
11250
link = calloc(1, sizeof(*link));
11251
if (!link)
11252
return libbpf_err_ptr(-ENOMEM);
11253
link->link.detach = &bpf_link_perf_detach;
11254
link->link.dealloc = &bpf_link_perf_dealloc;
11255
link->perf_event_fd = pfd;
11256
11257
force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
11258
if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
11259
DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
11260
.perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
11261
11262
link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
11263
if (link_fd < 0) {
11264
err = -errno;
11265
pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %s\n",
11266
prog->name, pfd, errstr(err));
11267
goto err_out;
11268
}
11269
link->link.fd = link_fd;
11270
} else {
11271
if (OPTS_GET(opts, bpf_cookie, 0)) {
11272
pr_warn("prog '%s': user context value is not supported\n", prog->name);
11273
err = -EOPNOTSUPP;
11274
goto err_out;
11275
}
11276
11277
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
11278
err = -errno;
11279
pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
11280
prog->name, pfd, errstr(err));
11281
if (err == -EPROTO)
11282
pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
11283
prog->name, pfd);
11284
goto err_out;
11285
}
11286
link->link.fd = pfd;
11287
}
11288
11289
if (!OPTS_GET(opts, dont_enable, false)) {
11290
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
11291
err = -errno;
11292
pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
11293
prog->name, pfd, errstr(err));
11294
goto err_out;
11295
}
11296
}
11297
11298
return &link->link;
11299
err_out:
11300
if (link_fd >= 0)
11301
close(link_fd);
11302
free(link);
11303
return libbpf_err_ptr(err);
11304
}
11305
11306
struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
11307
{
11308
return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
11309
}
11310
11311
/*
11312
* this function is expected to parse integer in the range of [0, 2^31-1] from
11313
* given file using scanf format string fmt. If actual parsed value is
11314
* negative, the result might be indistinguishable from error
11315
*/
11316
static int parse_uint_from_file(const char *file, const char *fmt)
11317
{
11318
int err, ret;
11319
FILE *f;
11320
11321
f = fopen(file, "re");
11322
if (!f) {
11323
err = -errno;
11324
pr_debug("failed to open '%s': %s\n", file, errstr(err));
11325
return err;
11326
}
11327
err = fscanf(f, fmt, &ret);
11328
if (err != 1) {
11329
err = err == EOF ? -EIO : -errno;
11330
pr_debug("failed to parse '%s': %s\n", file, errstr(err));
11331
fclose(f);
11332
return err;
11333
}
11334
fclose(f);
11335
return ret;
11336
}
11337
11338
static int determine_kprobe_perf_type(void)
11339
{
11340
const char *file = "/sys/bus/event_source/devices/kprobe/type";
11341
11342
return parse_uint_from_file(file, "%d\n");
11343
}
11344
11345
static int determine_uprobe_perf_type(void)
11346
{
11347
const char *file = "/sys/bus/event_source/devices/uprobe/type";
11348
11349
return parse_uint_from_file(file, "%d\n");
11350
}
11351
11352
static int determine_kprobe_retprobe_bit(void)
11353
{
11354
const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
11355
11356
return parse_uint_from_file(file, "config:%d\n");
11357
}
11358
11359
static int determine_uprobe_retprobe_bit(void)
11360
{
11361
const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
11362
11363
return parse_uint_from_file(file, "config:%d\n");
11364
}
11365
11366
#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
11367
#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
11368
11369
static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
11370
uint64_t offset, int pid, size_t ref_ctr_off)
11371
{
11372
const size_t attr_sz = sizeof(struct perf_event_attr);
11373
struct perf_event_attr attr;
11374
int type, pfd;
11375
11376
if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
11377
return -EINVAL;
11378
11379
memset(&attr, 0, attr_sz);
11380
11381
type = uprobe ? determine_uprobe_perf_type()
11382
: determine_kprobe_perf_type();
11383
if (type < 0) {
11384
pr_warn("failed to determine %s perf type: %s\n",
11385
uprobe ? "uprobe" : "kprobe",
11386
errstr(type));
11387
return type;
11388
}
11389
if (retprobe) {
11390
int bit = uprobe ? determine_uprobe_retprobe_bit()
11391
: determine_kprobe_retprobe_bit();
11392
11393
if (bit < 0) {
11394
pr_warn("failed to determine %s retprobe bit: %s\n",
11395
uprobe ? "uprobe" : "kprobe",
11396
errstr(bit));
11397
return bit;
11398
}
11399
attr.config |= 1 << bit;
11400
}
11401
attr.size = attr_sz;
11402
attr.type = type;
11403
attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
11404
attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
11405
attr.config2 = offset; /* kprobe_addr or probe_offset */
11406
11407
/* pid filter is meaningful only for uprobes */
11408
pfd = syscall(__NR_perf_event_open, &attr,
11409
pid < 0 ? -1 : pid /* pid */,
11410
pid == -1 ? 0 : -1 /* cpu */,
11411
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11412
return pfd >= 0 ? pfd : -errno;
11413
}
11414
11415
static int append_to_file(const char *file, const char *fmt, ...)
11416
{
11417
int fd, n, err = 0;
11418
va_list ap;
11419
char buf[1024];
11420
11421
va_start(ap, fmt);
11422
n = vsnprintf(buf, sizeof(buf), fmt, ap);
11423
va_end(ap);
11424
11425
if (n < 0 || n >= sizeof(buf))
11426
return -EINVAL;
11427
11428
fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
11429
if (fd < 0)
11430
return -errno;
11431
11432
if (write(fd, buf, n) < 0)
11433
err = -errno;
11434
11435
close(fd);
11436
return err;
11437
}
11438
11439
#define DEBUGFS "/sys/kernel/debug/tracing"
11440
#define TRACEFS "/sys/kernel/tracing"
11441
11442
static bool use_debugfs(void)
11443
{
11444
static int has_debugfs = -1;
11445
11446
if (has_debugfs < 0)
11447
has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
11448
11449
return has_debugfs == 1;
11450
}
11451
11452
static const char *tracefs_path(void)
11453
{
11454
return use_debugfs() ? DEBUGFS : TRACEFS;
11455
}
11456
11457
static const char *tracefs_kprobe_events(void)
11458
{
11459
return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
11460
}
11461
11462
static const char *tracefs_uprobe_events(void)
11463
{
11464
return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
11465
}
11466
11467
static const char *tracefs_available_filter_functions(void)
11468
{
11469
return use_debugfs() ? DEBUGFS"/available_filter_functions"
11470
: TRACEFS"/available_filter_functions";
11471
}
11472
11473
static const char *tracefs_available_filter_functions_addrs(void)
11474
{
11475
return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs"
11476
: TRACEFS"/available_filter_functions_addrs";
11477
}
11478
11479
static void gen_probe_legacy_event_name(char *buf, size_t buf_sz,
11480
const char *name, size_t offset)
11481
{
11482
static int index = 0;
11483
int i;
11484
11485
snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(),
11486
__sync_fetch_and_add(&index, 1), name, offset);
11487
11488
/* sanitize name in the probe name */
11489
for (i = 0; buf[i]; i++) {
11490
if (!isalnum(buf[i]))
11491
buf[i] = '_';
11492
}
11493
}
11494
11495
static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
11496
const char *kfunc_name, size_t offset)
11497
{
11498
return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
11499
retprobe ? 'r' : 'p',
11500
retprobe ? "kretprobes" : "kprobes",
11501
probe_name, kfunc_name, offset);
11502
}
11503
11504
static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
11505
{
11506
return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
11507
retprobe ? "kretprobes" : "kprobes", probe_name);
11508
}
11509
11510
static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
11511
{
11512
char file[256];
11513
11514
snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11515
tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
11516
11517
return parse_uint_from_file(file, "%d\n");
11518
}
11519
11520
static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
11521
const char *kfunc_name, size_t offset, int pid)
11522
{
11523
const size_t attr_sz = sizeof(struct perf_event_attr);
11524
struct perf_event_attr attr;
11525
int type, pfd, err;
11526
11527
err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
11528
if (err < 0) {
11529
pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
11530
kfunc_name, offset,
11531
errstr(err));
11532
return err;
11533
}
11534
type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
11535
if (type < 0) {
11536
err = type;
11537
pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
11538
kfunc_name, offset,
11539
errstr(err));
11540
goto err_clean_legacy;
11541
}
11542
11543
memset(&attr, 0, attr_sz);
11544
attr.size = attr_sz;
11545
attr.config = type;
11546
attr.type = PERF_TYPE_TRACEPOINT;
11547
11548
pfd = syscall(__NR_perf_event_open, &attr,
11549
pid < 0 ? -1 : pid, /* pid */
11550
pid == -1 ? 0 : -1, /* cpu */
11551
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11552
if (pfd < 0) {
11553
err = -errno;
11554
pr_warn("legacy kprobe perf_event_open() failed: %s\n",
11555
errstr(err));
11556
goto err_clean_legacy;
11557
}
11558
return pfd;
11559
11560
err_clean_legacy:
11561
/* Clear the newly added legacy kprobe_event */
11562
remove_kprobe_event_legacy(probe_name, retprobe);
11563
return err;
11564
}
11565
11566
static const char *arch_specific_syscall_pfx(void)
11567
{
11568
#if defined(__x86_64__)
11569
return "x64";
11570
#elif defined(__i386__)
11571
return "ia32";
11572
#elif defined(__s390x__)
11573
return "s390x";
11574
#elif defined(__arm__)
11575
return "arm";
11576
#elif defined(__aarch64__)
11577
return "arm64";
11578
#elif defined(__mips__)
11579
return "mips";
11580
#elif defined(__riscv)
11581
return "riscv";
11582
#elif defined(__powerpc__)
11583
return "powerpc";
11584
#elif defined(__powerpc64__)
11585
return "powerpc64";
11586
#else
11587
return NULL;
11588
#endif
11589
}
11590
11591
int probe_kern_syscall_wrapper(int token_fd)
11592
{
11593
char syscall_name[64];
11594
const char *ksys_pfx;
11595
11596
ksys_pfx = arch_specific_syscall_pfx();
11597
if (!ksys_pfx)
11598
return 0;
11599
11600
snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
11601
11602
if (determine_kprobe_perf_type() >= 0) {
11603
int pfd;
11604
11605
pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
11606
if (pfd >= 0)
11607
close(pfd);
11608
11609
return pfd >= 0 ? 1 : 0;
11610
} else { /* legacy mode */
11611
char probe_name[MAX_EVENT_NAME_LEN];
11612
11613
gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
11614
if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
11615
return 0;
11616
11617
(void)remove_kprobe_event_legacy(probe_name, false);
11618
return 1;
11619
}
11620
}
11621
11622
struct bpf_link *
11623
bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
11624
const char *func_name,
11625
const struct bpf_kprobe_opts *opts)
11626
{
11627
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11628
enum probe_attach_mode attach_mode;
11629
char *legacy_probe = NULL;
11630
struct bpf_link *link;
11631
size_t offset;
11632
bool retprobe, legacy;
11633
int pfd, err;
11634
11635
if (!OPTS_VALID(opts, bpf_kprobe_opts))
11636
return libbpf_err_ptr(-EINVAL);
11637
11638
attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11639
retprobe = OPTS_GET(opts, retprobe, false);
11640
offset = OPTS_GET(opts, offset, 0);
11641
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11642
11643
legacy = determine_kprobe_perf_type() < 0;
11644
switch (attach_mode) {
11645
case PROBE_ATTACH_MODE_LEGACY:
11646
legacy = true;
11647
pe_opts.force_ioctl_attach = true;
11648
break;
11649
case PROBE_ATTACH_MODE_PERF:
11650
if (legacy)
11651
return libbpf_err_ptr(-ENOTSUP);
11652
pe_opts.force_ioctl_attach = true;
11653
break;
11654
case PROBE_ATTACH_MODE_LINK:
11655
if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11656
return libbpf_err_ptr(-ENOTSUP);
11657
break;
11658
case PROBE_ATTACH_MODE_DEFAULT:
11659
break;
11660
default:
11661
return libbpf_err_ptr(-EINVAL);
11662
}
11663
11664
if (!legacy) {
11665
pfd = perf_event_open_probe(false /* uprobe */, retprobe,
11666
func_name, offset,
11667
-1 /* pid */, 0 /* ref_ctr_off */);
11668
} else {
11669
char probe_name[MAX_EVENT_NAME_LEN];
11670
11671
gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
11672
func_name, offset);
11673
11674
legacy_probe = strdup(probe_name);
11675
if (!legacy_probe)
11676
return libbpf_err_ptr(-ENOMEM);
11677
11678
pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
11679
offset, -1 /* pid */);
11680
}
11681
if (pfd < 0) {
11682
err = -errno;
11683
pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
11684
prog->name, retprobe ? "kretprobe" : "kprobe",
11685
func_name, offset,
11686
errstr(err));
11687
goto err_out;
11688
}
11689
link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11690
err = libbpf_get_error(link);
11691
if (err) {
11692
close(pfd);
11693
pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
11694
prog->name, retprobe ? "kretprobe" : "kprobe",
11695
func_name, offset,
11696
errstr(err));
11697
goto err_clean_legacy;
11698
}
11699
if (legacy) {
11700
struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11701
11702
perf_link->legacy_probe_name = legacy_probe;
11703
perf_link->legacy_is_kprobe = true;
11704
perf_link->legacy_is_retprobe = retprobe;
11705
}
11706
11707
return link;
11708
11709
err_clean_legacy:
11710
if (legacy)
11711
remove_kprobe_event_legacy(legacy_probe, retprobe);
11712
err_out:
11713
free(legacy_probe);
11714
return libbpf_err_ptr(err);
11715
}
11716
11717
struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
11718
bool retprobe,
11719
const char *func_name)
11720
{
11721
DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
11722
.retprobe = retprobe,
11723
);
11724
11725
return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
11726
}
11727
11728
struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
11729
const char *syscall_name,
11730
const struct bpf_ksyscall_opts *opts)
11731
{
11732
LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
11733
char func_name[128];
11734
11735
if (!OPTS_VALID(opts, bpf_ksyscall_opts))
11736
return libbpf_err_ptr(-EINVAL);
11737
11738
if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
11739
/* arch_specific_syscall_pfx() should never return NULL here
11740
* because it is guarded by kernel_supports(). However, since
11741
* compiler does not know that we have an explicit conditional
11742
* as well.
11743
*/
11744
snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
11745
arch_specific_syscall_pfx() ? : "", syscall_name);
11746
} else {
11747
snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
11748
}
11749
11750
kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
11751
kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11752
11753
return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
11754
}
11755
11756
/* Adapted from perf/util/string.c */
11757
bool glob_match(const char *str, const char *pat)
11758
{
11759
while (*str && *pat && *pat != '*') {
11760
if (*pat == '?') { /* Matches any single character */
11761
str++;
11762
pat++;
11763
continue;
11764
}
11765
if (*str != *pat)
11766
return false;
11767
str++;
11768
pat++;
11769
}
11770
/* Check wild card */
11771
if (*pat == '*') {
11772
while (*pat == '*')
11773
pat++;
11774
if (!*pat) /* Tail wild card matches all */
11775
return true;
11776
while (*str)
11777
if (glob_match(str++, pat))
11778
return true;
11779
}
11780
return !*str && !*pat;
11781
}
11782
11783
struct kprobe_multi_resolve {
11784
const char *pattern;
11785
unsigned long *addrs;
11786
size_t cap;
11787
size_t cnt;
11788
};
11789
11790
struct avail_kallsyms_data {
11791
char **syms;
11792
size_t cnt;
11793
struct kprobe_multi_resolve *res;
11794
};
11795
11796
static int avail_func_cmp(const void *a, const void *b)
11797
{
11798
return strcmp(*(const char **)a, *(const char **)b);
11799
}
11800
11801
static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
11802
const char *sym_name, void *ctx)
11803
{
11804
struct avail_kallsyms_data *data = ctx;
11805
struct kprobe_multi_resolve *res = data->res;
11806
int err;
11807
11808
if (!glob_match(sym_name, res->pattern))
11809
return 0;
11810
11811
if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) {
11812
/* Some versions of kernel strip out .llvm.<hash> suffix from
11813
* function names reported in available_filter_functions, but
11814
* don't do so for kallsyms. While this is clearly a kernel
11815
* bug (fixed by [0]) we try to accommodate that in libbpf to
11816
* make multi-kprobe usability a bit better: if no match is
11817
* found, we will strip .llvm. suffix and try one more time.
11818
*
11819
* [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG")
11820
*/
11821
char sym_trim[256], *psym_trim = sym_trim;
11822
const char *sym_sfx;
11823
11824
if (!(sym_sfx = strstr(sym_name, ".llvm.")))
11825
return 0;
11826
11827
/* psym_trim vs sym_trim dance is done to avoid pointer vs array
11828
* coercion differences and get proper `const char **` pointer
11829
* which avail_func_cmp() expects
11830
*/
11831
snprintf(sym_trim, sizeof(sym_trim), "%.*s", (int)(sym_sfx - sym_name), sym_name);
11832
if (!bsearch(&psym_trim, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
11833
return 0;
11834
}
11835
11836
err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
11837
if (err)
11838
return err;
11839
11840
res->addrs[res->cnt++] = (unsigned long)sym_addr;
11841
return 0;
11842
}
11843
11844
static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res)
11845
{
11846
const char *available_functions_file = tracefs_available_filter_functions();
11847
struct avail_kallsyms_data data;
11848
char sym_name[500];
11849
FILE *f;
11850
int err = 0, ret, i;
11851
char **syms = NULL;
11852
size_t cap = 0, cnt = 0;
11853
11854
f = fopen(available_functions_file, "re");
11855
if (!f) {
11856
err = -errno;
11857
pr_warn("failed to open %s: %s\n", available_functions_file, errstr(err));
11858
return err;
11859
}
11860
11861
while (true) {
11862
char *name;
11863
11864
ret = fscanf(f, "%499s%*[^\n]\n", sym_name);
11865
if (ret == EOF && feof(f))
11866
break;
11867
11868
if (ret != 1) {
11869
pr_warn("failed to parse available_filter_functions entry: %d\n", ret);
11870
err = -EINVAL;
11871
goto cleanup;
11872
}
11873
11874
if (!glob_match(sym_name, res->pattern))
11875
continue;
11876
11877
err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1);
11878
if (err)
11879
goto cleanup;
11880
11881
name = strdup(sym_name);
11882
if (!name) {
11883
err = -errno;
11884
goto cleanup;
11885
}
11886
11887
syms[cnt++] = name;
11888
}
11889
11890
/* no entries found, bail out */
11891
if (cnt == 0) {
11892
err = -ENOENT;
11893
goto cleanup;
11894
}
11895
11896
/* sort available functions */
11897
qsort(syms, cnt, sizeof(*syms), avail_func_cmp);
11898
11899
data.syms = syms;
11900
data.res = res;
11901
data.cnt = cnt;
11902
libbpf_kallsyms_parse(avail_kallsyms_cb, &data);
11903
11904
if (res->cnt == 0)
11905
err = -ENOENT;
11906
11907
cleanup:
11908
for (i = 0; i < cnt; i++)
11909
free((char *)syms[i]);
11910
free(syms);
11911
11912
fclose(f);
11913
return err;
11914
}
11915
11916
static bool has_available_filter_functions_addrs(void)
11917
{
11918
return access(tracefs_available_filter_functions_addrs(), R_OK) != -1;
11919
}
11920
11921
static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res)
11922
{
11923
const char *available_path = tracefs_available_filter_functions_addrs();
11924
char sym_name[500];
11925
FILE *f;
11926
int ret, err = 0;
11927
unsigned long long sym_addr;
11928
11929
f = fopen(available_path, "re");
11930
if (!f) {
11931
err = -errno;
11932
pr_warn("failed to open %s: %s\n", available_path, errstr(err));
11933
return err;
11934
}
11935
11936
while (true) {
11937
ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name);
11938
if (ret == EOF && feof(f))
11939
break;
11940
11941
if (ret != 2) {
11942
pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
11943
ret);
11944
err = -EINVAL;
11945
goto cleanup;
11946
}
11947
11948
if (!glob_match(sym_name, res->pattern))
11949
continue;
11950
11951
err = libbpf_ensure_mem((void **)&res->addrs, &res->cap,
11952
sizeof(*res->addrs), res->cnt + 1);
11953
if (err)
11954
goto cleanup;
11955
11956
res->addrs[res->cnt++] = (unsigned long)sym_addr;
11957
}
11958
11959
if (res->cnt == 0)
11960
err = -ENOENT;
11961
11962
cleanup:
11963
fclose(f);
11964
return err;
11965
}
11966
11967
struct bpf_link *
11968
bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
11969
const char *pattern,
11970
const struct bpf_kprobe_multi_opts *opts)
11971
{
11972
LIBBPF_OPTS(bpf_link_create_opts, lopts);
11973
struct kprobe_multi_resolve res = {
11974
.pattern = pattern,
11975
};
11976
enum bpf_attach_type attach_type;
11977
struct bpf_link *link = NULL;
11978
const unsigned long *addrs;
11979
int err, link_fd, prog_fd;
11980
bool retprobe, session, unique_match;
11981
const __u64 *cookies;
11982
const char **syms;
11983
size_t cnt;
11984
11985
if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
11986
return libbpf_err_ptr(-EINVAL);
11987
11988
prog_fd = bpf_program__fd(prog);
11989
if (prog_fd < 0) {
11990
pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
11991
prog->name);
11992
return libbpf_err_ptr(-EINVAL);
11993
}
11994
11995
syms = OPTS_GET(opts, syms, false);
11996
addrs = OPTS_GET(opts, addrs, false);
11997
cnt = OPTS_GET(opts, cnt, false);
11998
cookies = OPTS_GET(opts, cookies, false);
11999
unique_match = OPTS_GET(opts, unique_match, false);
12000
12001
if (!pattern && !addrs && !syms)
12002
return libbpf_err_ptr(-EINVAL);
12003
if (pattern && (addrs || syms || cookies || cnt))
12004
return libbpf_err_ptr(-EINVAL);
12005
if (!pattern && !cnt)
12006
return libbpf_err_ptr(-EINVAL);
12007
if (!pattern && unique_match)
12008
return libbpf_err_ptr(-EINVAL);
12009
if (addrs && syms)
12010
return libbpf_err_ptr(-EINVAL);
12011
12012
if (pattern) {
12013
if (has_available_filter_functions_addrs())
12014
err = libbpf_available_kprobes_parse(&res);
12015
else
12016
err = libbpf_available_kallsyms_parse(&res);
12017
if (err)
12018
goto error;
12019
12020
if (unique_match && res.cnt != 1) {
12021
pr_warn("prog '%s': failed to find a unique match for '%s' (%zu matches)\n",
12022
prog->name, pattern, res.cnt);
12023
err = -EINVAL;
12024
goto error;
12025
}
12026
12027
addrs = res.addrs;
12028
cnt = res.cnt;
12029
}
12030
12031
retprobe = OPTS_GET(opts, retprobe, false);
12032
session = OPTS_GET(opts, session, false);
12033
12034
if (retprobe && session)
12035
return libbpf_err_ptr(-EINVAL);
12036
12037
attach_type = session ? BPF_TRACE_KPROBE_SESSION : BPF_TRACE_KPROBE_MULTI;
12038
12039
lopts.kprobe_multi.syms = syms;
12040
lopts.kprobe_multi.addrs = addrs;
12041
lopts.kprobe_multi.cookies = cookies;
12042
lopts.kprobe_multi.cnt = cnt;
12043
lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
12044
12045
link = calloc(1, sizeof(*link));
12046
if (!link) {
12047
err = -ENOMEM;
12048
goto error;
12049
}
12050
link->detach = &bpf_link__detach_fd;
12051
12052
link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts);
12053
if (link_fd < 0) {
12054
err = -errno;
12055
pr_warn("prog '%s': failed to attach: %s\n",
12056
prog->name, errstr(err));
12057
goto error;
12058
}
12059
link->fd = link_fd;
12060
free(res.addrs);
12061
return link;
12062
12063
error:
12064
free(link);
12065
free(res.addrs);
12066
return libbpf_err_ptr(err);
12067
}
12068
12069
static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12070
{
12071
DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
12072
unsigned long offset = 0;
12073
const char *func_name;
12074
char *func;
12075
int n;
12076
12077
*link = NULL;
12078
12079
/* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
12080
if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
12081
return 0;
12082
12083
opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
12084
if (opts.retprobe)
12085
func_name = prog->sec_name + sizeof("kretprobe/") - 1;
12086
else
12087
func_name = prog->sec_name + sizeof("kprobe/") - 1;
12088
12089
n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
12090
if (n < 1) {
12091
pr_warn("kprobe name is invalid: %s\n", func_name);
12092
return -EINVAL;
12093
}
12094
if (opts.retprobe && offset != 0) {
12095
free(func);
12096
pr_warn("kretprobes do not support offset specification\n");
12097
return -EINVAL;
12098
}
12099
12100
opts.offset = offset;
12101
*link = bpf_program__attach_kprobe_opts(prog, func, &opts);
12102
free(func);
12103
return libbpf_get_error(*link);
12104
}
12105
12106
static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12107
{
12108
LIBBPF_OPTS(bpf_ksyscall_opts, opts);
12109
const char *syscall_name;
12110
12111
*link = NULL;
12112
12113
/* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
12114
if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
12115
return 0;
12116
12117
opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
12118
if (opts.retprobe)
12119
syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
12120
else
12121
syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
12122
12123
*link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
12124
return *link ? 0 : -errno;
12125
}
12126
12127
static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12128
{
12129
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
12130
const char *spec;
12131
char *pattern;
12132
int n;
12133
12134
*link = NULL;
12135
12136
/* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
12137
if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
12138
strcmp(prog->sec_name, "kretprobe.multi") == 0)
12139
return 0;
12140
12141
opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
12142
if (opts.retprobe)
12143
spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
12144
else
12145
spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
12146
12147
n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
12148
if (n < 1) {
12149
pr_warn("kprobe multi pattern is invalid: %s\n", spec);
12150
return -EINVAL;
12151
}
12152
12153
*link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
12154
free(pattern);
12155
return libbpf_get_error(*link);
12156
}
12157
12158
static int attach_kprobe_session(const struct bpf_program *prog, long cookie,
12159
struct bpf_link **link)
12160
{
12161
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, .session = true);
12162
const char *spec;
12163
char *pattern;
12164
int n;
12165
12166
*link = NULL;
12167
12168
/* no auto-attach for SEC("kprobe.session") */
12169
if (strcmp(prog->sec_name, "kprobe.session") == 0)
12170
return 0;
12171
12172
spec = prog->sec_name + sizeof("kprobe.session/") - 1;
12173
n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
12174
if (n < 1) {
12175
pr_warn("kprobe session pattern is invalid: %s\n", spec);
12176
return -EINVAL;
12177
}
12178
12179
*link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
12180
free(pattern);
12181
return *link ? 0 : -errno;
12182
}
12183
12184
static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12185
{
12186
char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
12187
LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
12188
int n, ret = -EINVAL;
12189
12190
*link = NULL;
12191
12192
n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
12193
&probe_type, &binary_path, &func_name);
12194
switch (n) {
12195
case 1:
12196
/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
12197
ret = 0;
12198
break;
12199
case 3:
12200
opts.session = str_has_pfx(probe_type, "uprobe.session");
12201
opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi");
12202
12203
*link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts);
12204
ret = libbpf_get_error(*link);
12205
break;
12206
default:
12207
pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
12208
prog->sec_name);
12209
break;
12210
}
12211
free(probe_type);
12212
free(binary_path);
12213
free(func_name);
12214
return ret;
12215
}
12216
12217
static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
12218
const char *binary_path, size_t offset)
12219
{
12220
return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
12221
retprobe ? 'r' : 'p',
12222
retprobe ? "uretprobes" : "uprobes",
12223
probe_name, binary_path, offset);
12224
}
12225
12226
static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
12227
{
12228
return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
12229
retprobe ? "uretprobes" : "uprobes", probe_name);
12230
}
12231
12232
static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
12233
{
12234
char file[512];
12235
12236
snprintf(file, sizeof(file), "%s/events/%s/%s/id",
12237
tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
12238
12239
return parse_uint_from_file(file, "%d\n");
12240
}
12241
12242
static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
12243
const char *binary_path, size_t offset, int pid)
12244
{
12245
const size_t attr_sz = sizeof(struct perf_event_attr);
12246
struct perf_event_attr attr;
12247
int type, pfd, err;
12248
12249
err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
12250
if (err < 0) {
12251
pr_warn("failed to add legacy uprobe event for %s:0x%zx: %s\n",
12252
binary_path, (size_t)offset, errstr(err));
12253
return err;
12254
}
12255
type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
12256
if (type < 0) {
12257
err = type;
12258
pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %s\n",
12259
binary_path, offset, errstr(err));
12260
goto err_clean_legacy;
12261
}
12262
12263
memset(&attr, 0, attr_sz);
12264
attr.size = attr_sz;
12265
attr.config = type;
12266
attr.type = PERF_TYPE_TRACEPOINT;
12267
12268
pfd = syscall(__NR_perf_event_open, &attr,
12269
pid < 0 ? -1 : pid, /* pid */
12270
pid == -1 ? 0 : -1, /* cpu */
12271
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
12272
if (pfd < 0) {
12273
err = -errno;
12274
pr_warn("legacy uprobe perf_event_open() failed: %s\n", errstr(err));
12275
goto err_clean_legacy;
12276
}
12277
return pfd;
12278
12279
err_clean_legacy:
12280
/* Clear the newly added legacy uprobe_event */
12281
remove_uprobe_event_legacy(probe_name, retprobe);
12282
return err;
12283
}
12284
12285
/* Find offset of function name in archive specified by path. Currently
12286
* supported are .zip files that do not compress their contents, as used on
12287
* Android in the form of APKs, for example. "file_name" is the name of the ELF
12288
* file inside the archive. "func_name" matches symbol name or name@@LIB for
12289
* library functions.
12290
*
12291
* An overview of the APK format specifically provided here:
12292
* https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
12293
*/
12294
static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
12295
const char *func_name)
12296
{
12297
struct zip_archive *archive;
12298
struct zip_entry entry;
12299
long ret;
12300
Elf *elf;
12301
12302
archive = zip_archive_open(archive_path);
12303
if (IS_ERR(archive)) {
12304
ret = PTR_ERR(archive);
12305
pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
12306
return ret;
12307
}
12308
12309
ret = zip_archive_find_entry(archive, file_name, &entry);
12310
if (ret) {
12311
pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
12312
archive_path, ret);
12313
goto out;
12314
}
12315
pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
12316
(unsigned long)entry.data_offset);
12317
12318
if (entry.compression) {
12319
pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
12320
archive_path);
12321
ret = -LIBBPF_ERRNO__FORMAT;
12322
goto out;
12323
}
12324
12325
elf = elf_memory((void *)entry.data, entry.data_length);
12326
if (!elf) {
12327
pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
12328
elf_errmsg(-1));
12329
ret = -LIBBPF_ERRNO__LIBELF;
12330
goto out;
12331
}
12332
12333
ret = elf_find_func_offset(elf, file_name, func_name);
12334
if (ret > 0) {
12335
pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
12336
func_name, file_name, archive_path, entry.data_offset, ret,
12337
ret + entry.data_offset);
12338
ret += entry.data_offset;
12339
}
12340
elf_end(elf);
12341
12342
out:
12343
zip_archive_close(archive);
12344
return ret;
12345
}
12346
12347
static const char *arch_specific_lib_paths(void)
12348
{
12349
/*
12350
* Based on https://packages.debian.org/sid/libc6.
12351
*
12352
* Assume that the traced program is built for the same architecture
12353
* as libbpf, which should cover the vast majority of cases.
12354
*/
12355
#if defined(__x86_64__)
12356
return "/lib/x86_64-linux-gnu";
12357
#elif defined(__i386__)
12358
return "/lib/i386-linux-gnu";
12359
#elif defined(__s390x__)
12360
return "/lib/s390x-linux-gnu";
12361
#elif defined(__arm__) && defined(__SOFTFP__)
12362
return "/lib/arm-linux-gnueabi";
12363
#elif defined(__arm__) && !defined(__SOFTFP__)
12364
return "/lib/arm-linux-gnueabihf";
12365
#elif defined(__aarch64__)
12366
return "/lib/aarch64-linux-gnu";
12367
#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
12368
return "/lib/mips64el-linux-gnuabi64";
12369
#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
12370
return "/lib/mipsel-linux-gnu";
12371
#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
12372
return "/lib/powerpc64le-linux-gnu";
12373
#elif defined(__sparc__) && defined(__arch64__)
12374
return "/lib/sparc64-linux-gnu";
12375
#elif defined(__riscv) && __riscv_xlen == 64
12376
return "/lib/riscv64-linux-gnu";
12377
#else
12378
return NULL;
12379
#endif
12380
}
12381
12382
/* Get full path to program/shared library. */
12383
static int resolve_full_path(const char *file, char *result, size_t result_sz)
12384
{
12385
const char *search_paths[3] = {};
12386
int i, perm;
12387
12388
if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
12389
search_paths[0] = getenv("LD_LIBRARY_PATH");
12390
search_paths[1] = "/usr/lib64:/usr/lib";
12391
search_paths[2] = arch_specific_lib_paths();
12392
perm = R_OK;
12393
} else {
12394
search_paths[0] = getenv("PATH");
12395
search_paths[1] = "/usr/bin:/usr/sbin";
12396
perm = R_OK | X_OK;
12397
}
12398
12399
for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
12400
const char *s;
12401
12402
if (!search_paths[i])
12403
continue;
12404
for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
12405
const char *next_path;
12406
int seg_len;
12407
12408
if (s[0] == ':')
12409
s++;
12410
next_path = strchr(s, ':');
12411
seg_len = next_path ? next_path - s : strlen(s);
12412
if (!seg_len)
12413
continue;
12414
snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
12415
/* ensure it has required permissions */
12416
if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
12417
continue;
12418
pr_debug("resolved '%s' to '%s'\n", file, result);
12419
return 0;
12420
}
12421
}
12422
return -ENOENT;
12423
}
12424
12425
struct bpf_link *
12426
bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
12427
pid_t pid,
12428
const char *path,
12429
const char *func_pattern,
12430
const struct bpf_uprobe_multi_opts *opts)
12431
{
12432
const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL;
12433
LIBBPF_OPTS(bpf_link_create_opts, lopts);
12434
unsigned long *resolved_offsets = NULL;
12435
enum bpf_attach_type attach_type;
12436
int err = 0, link_fd, prog_fd;
12437
struct bpf_link *link = NULL;
12438
char full_path[PATH_MAX];
12439
bool retprobe, session;
12440
const __u64 *cookies;
12441
const char **syms;
12442
size_t cnt;
12443
12444
if (!OPTS_VALID(opts, bpf_uprobe_multi_opts))
12445
return libbpf_err_ptr(-EINVAL);
12446
12447
prog_fd = bpf_program__fd(prog);
12448
if (prog_fd < 0) {
12449
pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
12450
prog->name);
12451
return libbpf_err_ptr(-EINVAL);
12452
}
12453
12454
syms = OPTS_GET(opts, syms, NULL);
12455
offsets = OPTS_GET(opts, offsets, NULL);
12456
ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL);
12457
cookies = OPTS_GET(opts, cookies, NULL);
12458
cnt = OPTS_GET(opts, cnt, 0);
12459
retprobe = OPTS_GET(opts, retprobe, false);
12460
session = OPTS_GET(opts, session, false);
12461
12462
/*
12463
* User can specify 2 mutually exclusive set of inputs:
12464
*
12465
* 1) use only path/func_pattern/pid arguments
12466
*
12467
* 2) use path/pid with allowed combinations of:
12468
* syms/offsets/ref_ctr_offsets/cookies/cnt
12469
*
12470
* - syms and offsets are mutually exclusive
12471
* - ref_ctr_offsets and cookies are optional
12472
*
12473
* Any other usage results in error.
12474
*/
12475
12476
if (!path)
12477
return libbpf_err_ptr(-EINVAL);
12478
if (!func_pattern && cnt == 0)
12479
return libbpf_err_ptr(-EINVAL);
12480
12481
if (func_pattern) {
12482
if (syms || offsets || ref_ctr_offsets || cookies || cnt)
12483
return libbpf_err_ptr(-EINVAL);
12484
} else {
12485
if (!!syms == !!offsets)
12486
return libbpf_err_ptr(-EINVAL);
12487
}
12488
12489
if (retprobe && session)
12490
return libbpf_err_ptr(-EINVAL);
12491
12492
if (func_pattern) {
12493
if (!strchr(path, '/')) {
12494
err = resolve_full_path(path, full_path, sizeof(full_path));
12495
if (err) {
12496
pr_warn("prog '%s': failed to resolve full path for '%s': %s\n",
12497
prog->name, path, errstr(err));
12498
return libbpf_err_ptr(err);
12499
}
12500
path = full_path;
12501
}
12502
12503
err = elf_resolve_pattern_offsets(path, func_pattern,
12504
&resolved_offsets, &cnt);
12505
if (err < 0)
12506
return libbpf_err_ptr(err);
12507
offsets = resolved_offsets;
12508
} else if (syms) {
12509
err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC);
12510
if (err < 0)
12511
return libbpf_err_ptr(err);
12512
offsets = resolved_offsets;
12513
}
12514
12515
attach_type = session ? BPF_TRACE_UPROBE_SESSION : BPF_TRACE_UPROBE_MULTI;
12516
12517
lopts.uprobe_multi.path = path;
12518
lopts.uprobe_multi.offsets = offsets;
12519
lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets;
12520
lopts.uprobe_multi.cookies = cookies;
12521
lopts.uprobe_multi.cnt = cnt;
12522
lopts.uprobe_multi.flags = retprobe ? BPF_F_UPROBE_MULTI_RETURN : 0;
12523
12524
if (pid == 0)
12525
pid = getpid();
12526
if (pid > 0)
12527
lopts.uprobe_multi.pid = pid;
12528
12529
link = calloc(1, sizeof(*link));
12530
if (!link) {
12531
err = -ENOMEM;
12532
goto error;
12533
}
12534
link->detach = &bpf_link__detach_fd;
12535
12536
link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts);
12537
if (link_fd < 0) {
12538
err = -errno;
12539
pr_warn("prog '%s': failed to attach multi-uprobe: %s\n",
12540
prog->name, errstr(err));
12541
goto error;
12542
}
12543
link->fd = link_fd;
12544
free(resolved_offsets);
12545
return link;
12546
12547
error:
12548
free(resolved_offsets);
12549
free(link);
12550
return libbpf_err_ptr(err);
12551
}
12552
12553
LIBBPF_API struct bpf_link *
12554
bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
12555
const char *binary_path, size_t func_offset,
12556
const struct bpf_uprobe_opts *opts)
12557
{
12558
const char *archive_path = NULL, *archive_sep = NULL;
12559
char *legacy_probe = NULL;
12560
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12561
enum probe_attach_mode attach_mode;
12562
char full_path[PATH_MAX];
12563
struct bpf_link *link;
12564
size_t ref_ctr_off;
12565
int pfd, err;
12566
bool retprobe, legacy;
12567
const char *func_name;
12568
12569
if (!OPTS_VALID(opts, bpf_uprobe_opts))
12570
return libbpf_err_ptr(-EINVAL);
12571
12572
attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
12573
retprobe = OPTS_GET(opts, retprobe, false);
12574
ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
12575
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12576
12577
if (!binary_path)
12578
return libbpf_err_ptr(-EINVAL);
12579
12580
/* Check if "binary_path" refers to an archive. */
12581
archive_sep = strstr(binary_path, "!/");
12582
if (archive_sep) {
12583
full_path[0] = '\0';
12584
libbpf_strlcpy(full_path, binary_path,
12585
min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
12586
archive_path = full_path;
12587
binary_path = archive_sep + 2;
12588
} else if (!strchr(binary_path, '/')) {
12589
err = resolve_full_path(binary_path, full_path, sizeof(full_path));
12590
if (err) {
12591
pr_warn("prog '%s': failed to resolve full path for '%s': %s\n",
12592
prog->name, binary_path, errstr(err));
12593
return libbpf_err_ptr(err);
12594
}
12595
binary_path = full_path;
12596
}
12597
func_name = OPTS_GET(opts, func_name, NULL);
12598
if (func_name) {
12599
long sym_off;
12600
12601
if (archive_path) {
12602
sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
12603
func_name);
12604
binary_path = archive_path;
12605
} else {
12606
sym_off = elf_find_func_offset_from_file(binary_path, func_name);
12607
}
12608
if (sym_off < 0)
12609
return libbpf_err_ptr(sym_off);
12610
func_offset += sym_off;
12611
}
12612
12613
legacy = determine_uprobe_perf_type() < 0;
12614
switch (attach_mode) {
12615
case PROBE_ATTACH_MODE_LEGACY:
12616
legacy = true;
12617
pe_opts.force_ioctl_attach = true;
12618
break;
12619
case PROBE_ATTACH_MODE_PERF:
12620
if (legacy)
12621
return libbpf_err_ptr(-ENOTSUP);
12622
pe_opts.force_ioctl_attach = true;
12623
break;
12624
case PROBE_ATTACH_MODE_LINK:
12625
if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
12626
return libbpf_err_ptr(-ENOTSUP);
12627
break;
12628
case PROBE_ATTACH_MODE_DEFAULT:
12629
break;
12630
default:
12631
return libbpf_err_ptr(-EINVAL);
12632
}
12633
12634
if (!legacy) {
12635
pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
12636
func_offset, pid, ref_ctr_off);
12637
} else {
12638
char probe_name[MAX_EVENT_NAME_LEN];
12639
12640
if (ref_ctr_off)
12641
return libbpf_err_ptr(-EINVAL);
12642
12643
gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
12644
strrchr(binary_path, '/') ? : binary_path,
12645
func_offset);
12646
12647
legacy_probe = strdup(probe_name);
12648
if (!legacy_probe)
12649
return libbpf_err_ptr(-ENOMEM);
12650
12651
pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
12652
binary_path, func_offset, pid);
12653
}
12654
if (pfd < 0) {
12655
err = -errno;
12656
pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
12657
prog->name, retprobe ? "uretprobe" : "uprobe",
12658
binary_path, func_offset,
12659
errstr(err));
12660
goto err_out;
12661
}
12662
12663
link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12664
err = libbpf_get_error(link);
12665
if (err) {
12666
close(pfd);
12667
pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
12668
prog->name, retprobe ? "uretprobe" : "uprobe",
12669
binary_path, func_offset,
12670
errstr(err));
12671
goto err_clean_legacy;
12672
}
12673
if (legacy) {
12674
struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
12675
12676
perf_link->legacy_probe_name = legacy_probe;
12677
perf_link->legacy_is_kprobe = false;
12678
perf_link->legacy_is_retprobe = retprobe;
12679
}
12680
return link;
12681
12682
err_clean_legacy:
12683
if (legacy)
12684
remove_uprobe_event_legacy(legacy_probe, retprobe);
12685
err_out:
12686
free(legacy_probe);
12687
return libbpf_err_ptr(err);
12688
}
12689
12690
/* Format of u[ret]probe section definition supporting auto-attach:
12691
* u[ret]probe/binary:function[+offset]
12692
*
12693
* binary can be an absolute/relative path or a filename; the latter is resolved to a
12694
* full binary path via bpf_program__attach_uprobe_opts.
12695
*
12696
* Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
12697
* specified (and auto-attach is not possible) or the above format is specified for
12698
* auto-attach.
12699
*/
12700
static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12701
{
12702
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
12703
char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
12704
int n, c, ret = -EINVAL;
12705
long offset = 0;
12706
12707
*link = NULL;
12708
12709
n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
12710
&probe_type, &binary_path, &func_name);
12711
switch (n) {
12712
case 1:
12713
/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
12714
ret = 0;
12715
break;
12716
case 2:
12717
pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
12718
prog->name, prog->sec_name);
12719
break;
12720
case 3:
12721
/* check if user specifies `+offset`, if yes, this should be
12722
* the last part of the string, make sure sscanf read to EOL
12723
*/
12724
func_off = strrchr(func_name, '+');
12725
if (func_off) {
12726
n = sscanf(func_off, "+%li%n", &offset, &c);
12727
if (n == 1 && *(func_off + c) == '\0')
12728
func_off[0] = '\0';
12729
else
12730
offset = 0;
12731
}
12732
opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
12733
strcmp(probe_type, "uretprobe.s") == 0;
12734
if (opts.retprobe && offset != 0) {
12735
pr_warn("prog '%s': uretprobes do not support offset specification\n",
12736
prog->name);
12737
break;
12738
}
12739
opts.func_name = func_name;
12740
*link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
12741
ret = libbpf_get_error(*link);
12742
break;
12743
default:
12744
pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
12745
prog->sec_name);
12746
break;
12747
}
12748
free(probe_type);
12749
free(binary_path);
12750
free(func_name);
12751
12752
return ret;
12753
}
12754
12755
struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
12756
bool retprobe, pid_t pid,
12757
const char *binary_path,
12758
size_t func_offset)
12759
{
12760
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
12761
12762
return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
12763
}
12764
12765
struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
12766
pid_t pid, const char *binary_path,
12767
const char *usdt_provider, const char *usdt_name,
12768
const struct bpf_usdt_opts *opts)
12769
{
12770
char resolved_path[512];
12771
struct bpf_object *obj = prog->obj;
12772
struct bpf_link *link;
12773
__u64 usdt_cookie;
12774
int err;
12775
12776
if (!OPTS_VALID(opts, bpf_uprobe_opts))
12777
return libbpf_err_ptr(-EINVAL);
12778
12779
if (bpf_program__fd(prog) < 0) {
12780
pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
12781
prog->name);
12782
return libbpf_err_ptr(-EINVAL);
12783
}
12784
12785
if (!binary_path)
12786
return libbpf_err_ptr(-EINVAL);
12787
12788
if (!strchr(binary_path, '/')) {
12789
err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
12790
if (err) {
12791
pr_warn("prog '%s': failed to resolve full path for '%s': %s\n",
12792
prog->name, binary_path, errstr(err));
12793
return libbpf_err_ptr(err);
12794
}
12795
binary_path = resolved_path;
12796
}
12797
12798
/* USDT manager is instantiated lazily on first USDT attach. It will
12799
* be destroyed together with BPF object in bpf_object__close().
12800
*/
12801
if (IS_ERR(obj->usdt_man))
12802
return libbpf_ptr(obj->usdt_man);
12803
if (!obj->usdt_man) {
12804
obj->usdt_man = usdt_manager_new(obj);
12805
if (IS_ERR(obj->usdt_man))
12806
return libbpf_ptr(obj->usdt_man);
12807
}
12808
12809
usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
12810
link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
12811
usdt_provider, usdt_name, usdt_cookie);
12812
err = libbpf_get_error(link);
12813
if (err)
12814
return libbpf_err_ptr(err);
12815
return link;
12816
}
12817
12818
static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12819
{
12820
char *path = NULL, *provider = NULL, *name = NULL;
12821
const char *sec_name;
12822
int n, err;
12823
12824
sec_name = bpf_program__section_name(prog);
12825
if (strcmp(sec_name, "usdt") == 0) {
12826
/* no auto-attach for just SEC("usdt") */
12827
*link = NULL;
12828
return 0;
12829
}
12830
12831
n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
12832
if (n != 3) {
12833
pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
12834
sec_name);
12835
err = -EINVAL;
12836
} else {
12837
*link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
12838
provider, name, NULL);
12839
err = libbpf_get_error(*link);
12840
}
12841
free(path);
12842
free(provider);
12843
free(name);
12844
return err;
12845
}
12846
12847
static int determine_tracepoint_id(const char *tp_category,
12848
const char *tp_name)
12849
{
12850
char file[PATH_MAX];
12851
int ret;
12852
12853
ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
12854
tracefs_path(), tp_category, tp_name);
12855
if (ret < 0)
12856
return -errno;
12857
if (ret >= sizeof(file)) {
12858
pr_debug("tracepoint %s/%s path is too long\n",
12859
tp_category, tp_name);
12860
return -E2BIG;
12861
}
12862
return parse_uint_from_file(file, "%d\n");
12863
}
12864
12865
static int perf_event_open_tracepoint(const char *tp_category,
12866
const char *tp_name)
12867
{
12868
const size_t attr_sz = sizeof(struct perf_event_attr);
12869
struct perf_event_attr attr;
12870
int tp_id, pfd, err;
12871
12872
tp_id = determine_tracepoint_id(tp_category, tp_name);
12873
if (tp_id < 0) {
12874
pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
12875
tp_category, tp_name,
12876
errstr(tp_id));
12877
return tp_id;
12878
}
12879
12880
memset(&attr, 0, attr_sz);
12881
attr.type = PERF_TYPE_TRACEPOINT;
12882
attr.size = attr_sz;
12883
attr.config = tp_id;
12884
12885
pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
12886
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
12887
if (pfd < 0) {
12888
err = -errno;
12889
pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
12890
tp_category, tp_name,
12891
errstr(err));
12892
return err;
12893
}
12894
return pfd;
12895
}
12896
12897
struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
12898
const char *tp_category,
12899
const char *tp_name,
12900
const struct bpf_tracepoint_opts *opts)
12901
{
12902
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12903
struct bpf_link *link;
12904
int pfd, err;
12905
12906
if (!OPTS_VALID(opts, bpf_tracepoint_opts))
12907
return libbpf_err_ptr(-EINVAL);
12908
12909
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12910
12911
pfd = perf_event_open_tracepoint(tp_category, tp_name);
12912
if (pfd < 0) {
12913
pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
12914
prog->name, tp_category, tp_name,
12915
errstr(pfd));
12916
return libbpf_err_ptr(pfd);
12917
}
12918
link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12919
err = libbpf_get_error(link);
12920
if (err) {
12921
close(pfd);
12922
pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
12923
prog->name, tp_category, tp_name,
12924
errstr(err));
12925
return libbpf_err_ptr(err);
12926
}
12927
return link;
12928
}
12929
12930
struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
12931
const char *tp_category,
12932
const char *tp_name)
12933
{
12934
return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
12935
}
12936
12937
static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12938
{
12939
char *sec_name, *tp_cat, *tp_name;
12940
12941
*link = NULL;
12942
12943
/* no auto-attach for SEC("tp") or SEC("tracepoint") */
12944
if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
12945
return 0;
12946
12947
sec_name = strdup(prog->sec_name);
12948
if (!sec_name)
12949
return -ENOMEM;
12950
12951
/* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
12952
if (str_has_pfx(prog->sec_name, "tp/"))
12953
tp_cat = sec_name + sizeof("tp/") - 1;
12954
else
12955
tp_cat = sec_name + sizeof("tracepoint/") - 1;
12956
tp_name = strchr(tp_cat, '/');
12957
if (!tp_name) {
12958
free(sec_name);
12959
return -EINVAL;
12960
}
12961
*tp_name = '\0';
12962
tp_name++;
12963
12964
*link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
12965
free(sec_name);
12966
return libbpf_get_error(*link);
12967
}
12968
12969
struct bpf_link *
12970
bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog,
12971
const char *tp_name,
12972
struct bpf_raw_tracepoint_opts *opts)
12973
{
12974
LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts);
12975
struct bpf_link *link;
12976
int prog_fd, pfd;
12977
12978
if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts))
12979
return libbpf_err_ptr(-EINVAL);
12980
12981
prog_fd = bpf_program__fd(prog);
12982
if (prog_fd < 0) {
12983
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12984
return libbpf_err_ptr(-EINVAL);
12985
}
12986
12987
link = calloc(1, sizeof(*link));
12988
if (!link)
12989
return libbpf_err_ptr(-ENOMEM);
12990
link->detach = &bpf_link__detach_fd;
12991
12992
raw_opts.tp_name = tp_name;
12993
raw_opts.cookie = OPTS_GET(opts, cookie, 0);
12994
pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts);
12995
if (pfd < 0) {
12996
pfd = -errno;
12997
free(link);
12998
pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
12999
prog->name, tp_name, errstr(pfd));
13000
return libbpf_err_ptr(pfd);
13001
}
13002
link->fd = pfd;
13003
return link;
13004
}
13005
13006
struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
13007
const char *tp_name)
13008
{
13009
return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL);
13010
}
13011
13012
static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
13013
{
13014
static const char *const prefixes[] = {
13015
"raw_tp",
13016
"raw_tracepoint",
13017
"raw_tp.w",
13018
"raw_tracepoint.w",
13019
};
13020
size_t i;
13021
const char *tp_name = NULL;
13022
13023
*link = NULL;
13024
13025
for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
13026
size_t pfx_len;
13027
13028
if (!str_has_pfx(prog->sec_name, prefixes[i]))
13029
continue;
13030
13031
pfx_len = strlen(prefixes[i]);
13032
/* no auto-attach case of, e.g., SEC("raw_tp") */
13033
if (prog->sec_name[pfx_len] == '\0')
13034
return 0;
13035
13036
if (prog->sec_name[pfx_len] != '/')
13037
continue;
13038
13039
tp_name = prog->sec_name + pfx_len + 1;
13040
break;
13041
}
13042
13043
if (!tp_name) {
13044
pr_warn("prog '%s': invalid section name '%s'\n",
13045
prog->name, prog->sec_name);
13046
return -EINVAL;
13047
}
13048
13049
*link = bpf_program__attach_raw_tracepoint(prog, tp_name);
13050
return libbpf_get_error(*link);
13051
}
13052
13053
/* Common logic for all BPF program types that attach to a btf_id */
13054
static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
13055
const struct bpf_trace_opts *opts)
13056
{
13057
LIBBPF_OPTS(bpf_link_create_opts, link_opts);
13058
struct bpf_link *link;
13059
int prog_fd, pfd;
13060
13061
if (!OPTS_VALID(opts, bpf_trace_opts))
13062
return libbpf_err_ptr(-EINVAL);
13063
13064
prog_fd = bpf_program__fd(prog);
13065
if (prog_fd < 0) {
13066
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
13067
return libbpf_err_ptr(-EINVAL);
13068
}
13069
13070
link = calloc(1, sizeof(*link));
13071
if (!link)
13072
return libbpf_err_ptr(-ENOMEM);
13073
link->detach = &bpf_link__detach_fd;
13074
13075
/* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
13076
link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
13077
pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
13078
if (pfd < 0) {
13079
pfd = -errno;
13080
free(link);
13081
pr_warn("prog '%s': failed to attach: %s\n",
13082
prog->name, errstr(pfd));
13083
return libbpf_err_ptr(pfd);
13084
}
13085
link->fd = pfd;
13086
return link;
13087
}
13088
13089
struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
13090
{
13091
return bpf_program__attach_btf_id(prog, NULL);
13092
}
13093
13094
struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
13095
const struct bpf_trace_opts *opts)
13096
{
13097
return bpf_program__attach_btf_id(prog, opts);
13098
}
13099
13100
struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
13101
{
13102
return bpf_program__attach_btf_id(prog, NULL);
13103
}
13104
13105
static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
13106
{
13107
*link = bpf_program__attach_trace(prog);
13108
return libbpf_get_error(*link);
13109
}
13110
13111
static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
13112
{
13113
*link = bpf_program__attach_lsm(prog);
13114
return libbpf_get_error(*link);
13115
}
13116
13117
static struct bpf_link *
13118
bpf_program_attach_fd(const struct bpf_program *prog,
13119
int target_fd, const char *target_name,
13120
const struct bpf_link_create_opts *opts)
13121
{
13122
enum bpf_attach_type attach_type;
13123
struct bpf_link *link;
13124
int prog_fd, link_fd;
13125
13126
prog_fd = bpf_program__fd(prog);
13127
if (prog_fd < 0) {
13128
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
13129
return libbpf_err_ptr(-EINVAL);
13130
}
13131
13132
link = calloc(1, sizeof(*link));
13133
if (!link)
13134
return libbpf_err_ptr(-ENOMEM);
13135
link->detach = &bpf_link__detach_fd;
13136
13137
attach_type = bpf_program__expected_attach_type(prog);
13138
link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
13139
if (link_fd < 0) {
13140
link_fd = -errno;
13141
free(link);
13142
pr_warn("prog '%s': failed to attach to %s: %s\n",
13143
prog->name, target_name,
13144
errstr(link_fd));
13145
return libbpf_err_ptr(link_fd);
13146
}
13147
link->fd = link_fd;
13148
return link;
13149
}
13150
13151
struct bpf_link *
13152
bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
13153
{
13154
return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
13155
}
13156
13157
struct bpf_link *
13158
bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
13159
{
13160
return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
13161
}
13162
13163
struct bpf_link *
13164
bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd)
13165
{
13166
return bpf_program_attach_fd(prog, map_fd, "sockmap", NULL);
13167
}
13168
13169
struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
13170
{
13171
/* target_fd/target_ifindex use the same field in LINK_CREATE */
13172
return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
13173
}
13174
13175
struct bpf_link *
13176
bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd,
13177
const struct bpf_cgroup_opts *opts)
13178
{
13179
LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
13180
__u32 relative_id;
13181
int relative_fd;
13182
13183
if (!OPTS_VALID(opts, bpf_cgroup_opts))
13184
return libbpf_err_ptr(-EINVAL);
13185
13186
relative_id = OPTS_GET(opts, relative_id, 0);
13187
relative_fd = OPTS_GET(opts, relative_fd, 0);
13188
13189
if (relative_fd && relative_id) {
13190
pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
13191
prog->name);
13192
return libbpf_err_ptr(-EINVAL);
13193
}
13194
13195
link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0);
13196
link_create_opts.cgroup.relative_fd = relative_fd;
13197
link_create_opts.cgroup.relative_id = relative_id;
13198
link_create_opts.flags = OPTS_GET(opts, flags, 0);
13199
13200
return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts);
13201
}
13202
13203
struct bpf_link *
13204
bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
13205
const struct bpf_tcx_opts *opts)
13206
{
13207
LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
13208
__u32 relative_id;
13209
int relative_fd;
13210
13211
if (!OPTS_VALID(opts, bpf_tcx_opts))
13212
return libbpf_err_ptr(-EINVAL);
13213
13214
relative_id = OPTS_GET(opts, relative_id, 0);
13215
relative_fd = OPTS_GET(opts, relative_fd, 0);
13216
13217
/* validate we don't have unexpected combinations of non-zero fields */
13218
if (!ifindex) {
13219
pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
13220
prog->name);
13221
return libbpf_err_ptr(-EINVAL);
13222
}
13223
if (relative_fd && relative_id) {
13224
pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
13225
prog->name);
13226
return libbpf_err_ptr(-EINVAL);
13227
}
13228
13229
link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
13230
link_create_opts.tcx.relative_fd = relative_fd;
13231
link_create_opts.tcx.relative_id = relative_id;
13232
link_create_opts.flags = OPTS_GET(opts, flags, 0);
13233
13234
/* target_fd/target_ifindex use the same field in LINK_CREATE */
13235
return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
13236
}
13237
13238
struct bpf_link *
13239
bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
13240
const struct bpf_netkit_opts *opts)
13241
{
13242
LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
13243
__u32 relative_id;
13244
int relative_fd;
13245
13246
if (!OPTS_VALID(opts, bpf_netkit_opts))
13247
return libbpf_err_ptr(-EINVAL);
13248
13249
relative_id = OPTS_GET(opts, relative_id, 0);
13250
relative_fd = OPTS_GET(opts, relative_fd, 0);
13251
13252
/* validate we don't have unexpected combinations of non-zero fields */
13253
if (!ifindex) {
13254
pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
13255
prog->name);
13256
return libbpf_err_ptr(-EINVAL);
13257
}
13258
if (relative_fd && relative_id) {
13259
pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
13260
prog->name);
13261
return libbpf_err_ptr(-EINVAL);
13262
}
13263
13264
link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0);
13265
link_create_opts.netkit.relative_fd = relative_fd;
13266
link_create_opts.netkit.relative_id = relative_id;
13267
link_create_opts.flags = OPTS_GET(opts, flags, 0);
13268
13269
return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts);
13270
}
13271
13272
struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
13273
int target_fd,
13274
const char *attach_func_name)
13275
{
13276
int btf_id;
13277
13278
if (!!target_fd != !!attach_func_name) {
13279
pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
13280
prog->name);
13281
return libbpf_err_ptr(-EINVAL);
13282
}
13283
13284
if (prog->type != BPF_PROG_TYPE_EXT) {
13285
pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace\n",
13286
prog->name);
13287
return libbpf_err_ptr(-EINVAL);
13288
}
13289
13290
if (target_fd) {
13291
LIBBPF_OPTS(bpf_link_create_opts, target_opts);
13292
13293
btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd);
13294
if (btf_id < 0)
13295
return libbpf_err_ptr(btf_id);
13296
13297
target_opts.target_btf_id = btf_id;
13298
13299
return bpf_program_attach_fd(prog, target_fd, "freplace",
13300
&target_opts);
13301
} else {
13302
/* no target, so use raw_tracepoint_open for compatibility
13303
* with old kernels
13304
*/
13305
return bpf_program__attach_trace(prog);
13306
}
13307
}
13308
13309
struct bpf_link *
13310
bpf_program__attach_iter(const struct bpf_program *prog,
13311
const struct bpf_iter_attach_opts *opts)
13312
{
13313
DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
13314
struct bpf_link *link;
13315
int prog_fd, link_fd;
13316
__u32 target_fd = 0;
13317
13318
if (!OPTS_VALID(opts, bpf_iter_attach_opts))
13319
return libbpf_err_ptr(-EINVAL);
13320
13321
link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
13322
link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
13323
13324
prog_fd = bpf_program__fd(prog);
13325
if (prog_fd < 0) {
13326
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
13327
return libbpf_err_ptr(-EINVAL);
13328
}
13329
13330
link = calloc(1, sizeof(*link));
13331
if (!link)
13332
return libbpf_err_ptr(-ENOMEM);
13333
link->detach = &bpf_link__detach_fd;
13334
13335
link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
13336
&link_create_opts);
13337
if (link_fd < 0) {
13338
link_fd = -errno;
13339
free(link);
13340
pr_warn("prog '%s': failed to attach to iterator: %s\n",
13341
prog->name, errstr(link_fd));
13342
return libbpf_err_ptr(link_fd);
13343
}
13344
link->fd = link_fd;
13345
return link;
13346
}
13347
13348
static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
13349
{
13350
*link = bpf_program__attach_iter(prog, NULL);
13351
return libbpf_get_error(*link);
13352
}
13353
13354
struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
13355
const struct bpf_netfilter_opts *opts)
13356
{
13357
LIBBPF_OPTS(bpf_link_create_opts, lopts);
13358
struct bpf_link *link;
13359
int prog_fd, link_fd;
13360
13361
if (!OPTS_VALID(opts, bpf_netfilter_opts))
13362
return libbpf_err_ptr(-EINVAL);
13363
13364
prog_fd = bpf_program__fd(prog);
13365
if (prog_fd < 0) {
13366
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
13367
return libbpf_err_ptr(-EINVAL);
13368
}
13369
13370
link = calloc(1, sizeof(*link));
13371
if (!link)
13372
return libbpf_err_ptr(-ENOMEM);
13373
13374
link->detach = &bpf_link__detach_fd;
13375
13376
lopts.netfilter.pf = OPTS_GET(opts, pf, 0);
13377
lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0);
13378
lopts.netfilter.priority = OPTS_GET(opts, priority, 0);
13379
lopts.netfilter.flags = OPTS_GET(opts, flags, 0);
13380
13381
link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts);
13382
if (link_fd < 0) {
13383
link_fd = -errno;
13384
free(link);
13385
pr_warn("prog '%s': failed to attach to netfilter: %s\n",
13386
prog->name, errstr(link_fd));
13387
return libbpf_err_ptr(link_fd);
13388
}
13389
link->fd = link_fd;
13390
13391
return link;
13392
}
13393
13394
struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
13395
{
13396
struct bpf_link *link = NULL;
13397
int err;
13398
13399
if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
13400
return libbpf_err_ptr(-EOPNOTSUPP);
13401
13402
if (bpf_program__fd(prog) < 0) {
13403
pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
13404
prog->name);
13405
return libbpf_err_ptr(-EINVAL);
13406
}
13407
13408
err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
13409
if (err)
13410
return libbpf_err_ptr(err);
13411
13412
/* When calling bpf_program__attach() explicitly, auto-attach support
13413
* is expected to work, so NULL returned link is considered an error.
13414
* This is different for skeleton's attach, see comment in
13415
* bpf_object__attach_skeleton().
13416
*/
13417
if (!link)
13418
return libbpf_err_ptr(-EOPNOTSUPP);
13419
13420
return link;
13421
}
13422
13423
struct bpf_link_struct_ops {
13424
struct bpf_link link;
13425
int map_fd;
13426
};
13427
13428
static int bpf_link__detach_struct_ops(struct bpf_link *link)
13429
{
13430
struct bpf_link_struct_ops *st_link;
13431
__u32 zero = 0;
13432
13433
st_link = container_of(link, struct bpf_link_struct_ops, link);
13434
13435
if (st_link->map_fd < 0)
13436
/* w/o a real link */
13437
return bpf_map_delete_elem(link->fd, &zero);
13438
13439
return close(link->fd);
13440
}
13441
13442
struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
13443
{
13444
struct bpf_link_struct_ops *link;
13445
__u32 zero = 0;
13446
int err, fd;
13447
13448
if (!bpf_map__is_struct_ops(map)) {
13449
pr_warn("map '%s': can't attach non-struct_ops map\n", map->name);
13450
return libbpf_err_ptr(-EINVAL);
13451
}
13452
13453
if (map->fd < 0) {
13454
pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name);
13455
return libbpf_err_ptr(-EINVAL);
13456
}
13457
13458
link = calloc(1, sizeof(*link));
13459
if (!link)
13460
return libbpf_err_ptr(-EINVAL);
13461
13462
/* kern_vdata should be prepared during the loading phase. */
13463
err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
13464
/* It can be EBUSY if the map has been used to create or
13465
* update a link before. We don't allow updating the value of
13466
* a struct_ops once it is set. That ensures that the value
13467
* never changed. So, it is safe to skip EBUSY.
13468
*/
13469
if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
13470
free(link);
13471
return libbpf_err_ptr(err);
13472
}
13473
13474
link->link.detach = bpf_link__detach_struct_ops;
13475
13476
if (!(map->def.map_flags & BPF_F_LINK)) {
13477
/* w/o a real link */
13478
link->link.fd = map->fd;
13479
link->map_fd = -1;
13480
return &link->link;
13481
}
13482
13483
fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
13484
if (fd < 0) {
13485
free(link);
13486
return libbpf_err_ptr(fd);
13487
}
13488
13489
link->link.fd = fd;
13490
link->map_fd = map->fd;
13491
13492
return &link->link;
13493
}
13494
13495
/*
13496
* Swap the back struct_ops of a link with a new struct_ops map.
13497
*/
13498
int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
13499
{
13500
struct bpf_link_struct_ops *st_ops_link;
13501
__u32 zero = 0;
13502
int err;
13503
13504
if (!bpf_map__is_struct_ops(map))
13505
return libbpf_err(-EINVAL);
13506
13507
if (map->fd < 0) {
13508
pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
13509
return libbpf_err(-EINVAL);
13510
}
13511
13512
st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
13513
/* Ensure the type of a link is correct */
13514
if (st_ops_link->map_fd < 0)
13515
return libbpf_err(-EINVAL);
13516
13517
err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
13518
/* It can be EBUSY if the map has been used to create or
13519
* update a link before. We don't allow updating the value of
13520
* a struct_ops once it is set. That ensures that the value
13521
* never changed. So, it is safe to skip EBUSY.
13522
*/
13523
if (err && err != -EBUSY)
13524
return err;
13525
13526
err = bpf_link_update(link->fd, map->fd, NULL);
13527
if (err < 0)
13528
return err;
13529
13530
st_ops_link->map_fd = map->fd;
13531
13532
return 0;
13533
}
13534
13535
typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
13536
void *private_data);
13537
13538
static enum bpf_perf_event_ret
13539
perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
13540
void **copy_mem, size_t *copy_size,
13541
bpf_perf_event_print_t fn, void *private_data)
13542
{
13543
struct perf_event_mmap_page *header = mmap_mem;
13544
__u64 data_head = ring_buffer_read_head(header);
13545
__u64 data_tail = header->data_tail;
13546
void *base = ((__u8 *)header) + page_size;
13547
int ret = LIBBPF_PERF_EVENT_CONT;
13548
struct perf_event_header *ehdr;
13549
size_t ehdr_size;
13550
13551
while (data_head != data_tail) {
13552
ehdr = base + (data_tail & (mmap_size - 1));
13553
ehdr_size = ehdr->size;
13554
13555
if (((void *)ehdr) + ehdr_size > base + mmap_size) {
13556
void *copy_start = ehdr;
13557
size_t len_first = base + mmap_size - copy_start;
13558
size_t len_secnd = ehdr_size - len_first;
13559
13560
if (*copy_size < ehdr_size) {
13561
free(*copy_mem);
13562
*copy_mem = malloc(ehdr_size);
13563
if (!*copy_mem) {
13564
*copy_size = 0;
13565
ret = LIBBPF_PERF_EVENT_ERROR;
13566
break;
13567
}
13568
*copy_size = ehdr_size;
13569
}
13570
13571
memcpy(*copy_mem, copy_start, len_first);
13572
memcpy(*copy_mem + len_first, base, len_secnd);
13573
ehdr = *copy_mem;
13574
}
13575
13576
ret = fn(ehdr, private_data);
13577
data_tail += ehdr_size;
13578
if (ret != LIBBPF_PERF_EVENT_CONT)
13579
break;
13580
}
13581
13582
ring_buffer_write_tail(header, data_tail);
13583
return libbpf_err(ret);
13584
}
13585
13586
struct perf_buffer;
13587
13588
struct perf_buffer_params {
13589
struct perf_event_attr *attr;
13590
/* if event_cb is specified, it takes precendence */
13591
perf_buffer_event_fn event_cb;
13592
/* sample_cb and lost_cb are higher-level common-case callbacks */
13593
perf_buffer_sample_fn sample_cb;
13594
perf_buffer_lost_fn lost_cb;
13595
void *ctx;
13596
int cpu_cnt;
13597
int *cpus;
13598
int *map_keys;
13599
};
13600
13601
struct perf_cpu_buf {
13602
struct perf_buffer *pb;
13603
void *base; /* mmap()'ed memory */
13604
void *buf; /* for reconstructing segmented data */
13605
size_t buf_size;
13606
int fd;
13607
int cpu;
13608
int map_key;
13609
};
13610
13611
struct perf_buffer {
13612
perf_buffer_event_fn event_cb;
13613
perf_buffer_sample_fn sample_cb;
13614
perf_buffer_lost_fn lost_cb;
13615
void *ctx; /* passed into callbacks */
13616
13617
size_t page_size;
13618
size_t mmap_size;
13619
struct perf_cpu_buf **cpu_bufs;
13620
struct epoll_event *events;
13621
int cpu_cnt; /* number of allocated CPU buffers */
13622
int epoll_fd; /* perf event FD */
13623
int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
13624
};
13625
13626
static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
13627
struct perf_cpu_buf *cpu_buf)
13628
{
13629
if (!cpu_buf)
13630
return;
13631
if (cpu_buf->base &&
13632
munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
13633
pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
13634
if (cpu_buf->fd >= 0) {
13635
ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
13636
close(cpu_buf->fd);
13637
}
13638
free(cpu_buf->buf);
13639
free(cpu_buf);
13640
}
13641
13642
void perf_buffer__free(struct perf_buffer *pb)
13643
{
13644
int i;
13645
13646
if (IS_ERR_OR_NULL(pb))
13647
return;
13648
if (pb->cpu_bufs) {
13649
for (i = 0; i < pb->cpu_cnt; i++) {
13650
struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
13651
13652
if (!cpu_buf)
13653
continue;
13654
13655
bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
13656
perf_buffer__free_cpu_buf(pb, cpu_buf);
13657
}
13658
free(pb->cpu_bufs);
13659
}
13660
if (pb->epoll_fd >= 0)
13661
close(pb->epoll_fd);
13662
free(pb->events);
13663
free(pb);
13664
}
13665
13666
static struct perf_cpu_buf *
13667
perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
13668
int cpu, int map_key)
13669
{
13670
struct perf_cpu_buf *cpu_buf;
13671
int err;
13672
13673
cpu_buf = calloc(1, sizeof(*cpu_buf));
13674
if (!cpu_buf)
13675
return ERR_PTR(-ENOMEM);
13676
13677
cpu_buf->pb = pb;
13678
cpu_buf->cpu = cpu;
13679
cpu_buf->map_key = map_key;
13680
13681
cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
13682
-1, PERF_FLAG_FD_CLOEXEC);
13683
if (cpu_buf->fd < 0) {
13684
err = -errno;
13685
pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
13686
cpu, errstr(err));
13687
goto error;
13688
}
13689
13690
cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
13691
PROT_READ | PROT_WRITE, MAP_SHARED,
13692
cpu_buf->fd, 0);
13693
if (cpu_buf->base == MAP_FAILED) {
13694
cpu_buf->base = NULL;
13695
err = -errno;
13696
pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
13697
cpu, errstr(err));
13698
goto error;
13699
}
13700
13701
if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
13702
err = -errno;
13703
pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
13704
cpu, errstr(err));
13705
goto error;
13706
}
13707
13708
return cpu_buf;
13709
13710
error:
13711
perf_buffer__free_cpu_buf(pb, cpu_buf);
13712
return (struct perf_cpu_buf *)ERR_PTR(err);
13713
}
13714
13715
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13716
struct perf_buffer_params *p);
13717
13718
struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
13719
perf_buffer_sample_fn sample_cb,
13720
perf_buffer_lost_fn lost_cb,
13721
void *ctx,
13722
const struct perf_buffer_opts *opts)
13723
{
13724
const size_t attr_sz = sizeof(struct perf_event_attr);
13725
struct perf_buffer_params p = {};
13726
struct perf_event_attr attr;
13727
__u32 sample_period;
13728
13729
if (!OPTS_VALID(opts, perf_buffer_opts))
13730
return libbpf_err_ptr(-EINVAL);
13731
13732
sample_period = OPTS_GET(opts, sample_period, 1);
13733
if (!sample_period)
13734
sample_period = 1;
13735
13736
memset(&attr, 0, attr_sz);
13737
attr.size = attr_sz;
13738
attr.config = PERF_COUNT_SW_BPF_OUTPUT;
13739
attr.type = PERF_TYPE_SOFTWARE;
13740
attr.sample_type = PERF_SAMPLE_RAW;
13741
attr.wakeup_events = sample_period;
13742
13743
p.attr = &attr;
13744
p.sample_cb = sample_cb;
13745
p.lost_cb = lost_cb;
13746
p.ctx = ctx;
13747
13748
return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13749
}
13750
13751
struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
13752
struct perf_event_attr *attr,
13753
perf_buffer_event_fn event_cb, void *ctx,
13754
const struct perf_buffer_raw_opts *opts)
13755
{
13756
struct perf_buffer_params p = {};
13757
13758
if (!attr)
13759
return libbpf_err_ptr(-EINVAL);
13760
13761
if (!OPTS_VALID(opts, perf_buffer_raw_opts))
13762
return libbpf_err_ptr(-EINVAL);
13763
13764
p.attr = attr;
13765
p.event_cb = event_cb;
13766
p.ctx = ctx;
13767
p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
13768
p.cpus = OPTS_GET(opts, cpus, NULL);
13769
p.map_keys = OPTS_GET(opts, map_keys, NULL);
13770
13771
return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13772
}
13773
13774
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13775
struct perf_buffer_params *p)
13776
{
13777
const char *online_cpus_file = "/sys/devices/system/cpu/online";
13778
struct bpf_map_info map;
13779
struct perf_buffer *pb;
13780
bool *online = NULL;
13781
__u32 map_info_len;
13782
int err, i, j, n;
13783
13784
if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
13785
pr_warn("page count should be power of two, but is %zu\n",
13786
page_cnt);
13787
return ERR_PTR(-EINVAL);
13788
}
13789
13790
/* best-effort sanity checks */
13791
memset(&map, 0, sizeof(map));
13792
map_info_len = sizeof(map);
13793
err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
13794
if (err) {
13795
err = -errno;
13796
/* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
13797
* -EBADFD, -EFAULT, or -E2BIG on real error
13798
*/
13799
if (err != -EINVAL) {
13800
pr_warn("failed to get map info for map FD %d: %s\n",
13801
map_fd, errstr(err));
13802
return ERR_PTR(err);
13803
}
13804
pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
13805
map_fd);
13806
} else {
13807
if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
13808
pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
13809
map.name);
13810
return ERR_PTR(-EINVAL);
13811
}
13812
}
13813
13814
pb = calloc(1, sizeof(*pb));
13815
if (!pb)
13816
return ERR_PTR(-ENOMEM);
13817
13818
pb->event_cb = p->event_cb;
13819
pb->sample_cb = p->sample_cb;
13820
pb->lost_cb = p->lost_cb;
13821
pb->ctx = p->ctx;
13822
13823
pb->page_size = getpagesize();
13824
pb->mmap_size = pb->page_size * page_cnt;
13825
pb->map_fd = map_fd;
13826
13827
pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
13828
if (pb->epoll_fd < 0) {
13829
err = -errno;
13830
pr_warn("failed to create epoll instance: %s\n",
13831
errstr(err));
13832
goto error;
13833
}
13834
13835
if (p->cpu_cnt > 0) {
13836
pb->cpu_cnt = p->cpu_cnt;
13837
} else {
13838
pb->cpu_cnt = libbpf_num_possible_cpus();
13839
if (pb->cpu_cnt < 0) {
13840
err = pb->cpu_cnt;
13841
goto error;
13842
}
13843
if (map.max_entries && map.max_entries < pb->cpu_cnt)
13844
pb->cpu_cnt = map.max_entries;
13845
}
13846
13847
pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
13848
if (!pb->events) {
13849
err = -ENOMEM;
13850
pr_warn("failed to allocate events: out of memory\n");
13851
goto error;
13852
}
13853
pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
13854
if (!pb->cpu_bufs) {
13855
err = -ENOMEM;
13856
pr_warn("failed to allocate buffers: out of memory\n");
13857
goto error;
13858
}
13859
13860
err = parse_cpu_mask_file(online_cpus_file, &online, &n);
13861
if (err) {
13862
pr_warn("failed to get online CPU mask: %s\n", errstr(err));
13863
goto error;
13864
}
13865
13866
for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
13867
struct perf_cpu_buf *cpu_buf;
13868
int cpu, map_key;
13869
13870
cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
13871
map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
13872
13873
/* in case user didn't explicitly requested particular CPUs to
13874
* be attached to, skip offline/not present CPUs
13875
*/
13876
if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
13877
continue;
13878
13879
cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
13880
if (IS_ERR(cpu_buf)) {
13881
err = PTR_ERR(cpu_buf);
13882
goto error;
13883
}
13884
13885
pb->cpu_bufs[j] = cpu_buf;
13886
13887
err = bpf_map_update_elem(pb->map_fd, &map_key,
13888
&cpu_buf->fd, 0);
13889
if (err) {
13890
err = -errno;
13891
pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
13892
cpu, map_key, cpu_buf->fd,
13893
errstr(err));
13894
goto error;
13895
}
13896
13897
pb->events[j].events = EPOLLIN;
13898
pb->events[j].data.ptr = cpu_buf;
13899
if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
13900
&pb->events[j]) < 0) {
13901
err = -errno;
13902
pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
13903
cpu, cpu_buf->fd,
13904
errstr(err));
13905
goto error;
13906
}
13907
j++;
13908
}
13909
pb->cpu_cnt = j;
13910
free(online);
13911
13912
return pb;
13913
13914
error:
13915
free(online);
13916
if (pb)
13917
perf_buffer__free(pb);
13918
return ERR_PTR(err);
13919
}
13920
13921
struct perf_sample_raw {
13922
struct perf_event_header header;
13923
uint32_t size;
13924
char data[];
13925
};
13926
13927
struct perf_sample_lost {
13928
struct perf_event_header header;
13929
uint64_t id;
13930
uint64_t lost;
13931
uint64_t sample_id;
13932
};
13933
13934
static enum bpf_perf_event_ret
13935
perf_buffer__process_record(struct perf_event_header *e, void *ctx)
13936
{
13937
struct perf_cpu_buf *cpu_buf = ctx;
13938
struct perf_buffer *pb = cpu_buf->pb;
13939
void *data = e;
13940
13941
/* user wants full control over parsing perf event */
13942
if (pb->event_cb)
13943
return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
13944
13945
switch (e->type) {
13946
case PERF_RECORD_SAMPLE: {
13947
struct perf_sample_raw *s = data;
13948
13949
if (pb->sample_cb)
13950
pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
13951
break;
13952
}
13953
case PERF_RECORD_LOST: {
13954
struct perf_sample_lost *s = data;
13955
13956
if (pb->lost_cb)
13957
pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
13958
break;
13959
}
13960
default:
13961
pr_warn("unknown perf sample type %d\n", e->type);
13962
return LIBBPF_PERF_EVENT_ERROR;
13963
}
13964
return LIBBPF_PERF_EVENT_CONT;
13965
}
13966
13967
static int perf_buffer__process_records(struct perf_buffer *pb,
13968
struct perf_cpu_buf *cpu_buf)
13969
{
13970
enum bpf_perf_event_ret ret;
13971
13972
ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
13973
pb->page_size, &cpu_buf->buf,
13974
&cpu_buf->buf_size,
13975
perf_buffer__process_record, cpu_buf);
13976
if (ret != LIBBPF_PERF_EVENT_CONT)
13977
return ret;
13978
return 0;
13979
}
13980
13981
int perf_buffer__epoll_fd(const struct perf_buffer *pb)
13982
{
13983
return pb->epoll_fd;
13984
}
13985
13986
int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
13987
{
13988
int i, cnt, err;
13989
13990
cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
13991
if (cnt < 0)
13992
return -errno;
13993
13994
for (i = 0; i < cnt; i++) {
13995
struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
13996
13997
err = perf_buffer__process_records(pb, cpu_buf);
13998
if (err) {
13999
pr_warn("error while processing records: %s\n", errstr(err));
14000
return libbpf_err(err);
14001
}
14002
}
14003
return cnt;
14004
}
14005
14006
/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
14007
* manager.
14008
*/
14009
size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
14010
{
14011
return pb->cpu_cnt;
14012
}
14013
14014
/*
14015
* Return perf_event FD of a ring buffer in *buf_idx* slot of
14016
* PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
14017
* select()/poll()/epoll() Linux syscalls.
14018
*/
14019
int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
14020
{
14021
struct perf_cpu_buf *cpu_buf;
14022
14023
if (buf_idx >= pb->cpu_cnt)
14024
return libbpf_err(-EINVAL);
14025
14026
cpu_buf = pb->cpu_bufs[buf_idx];
14027
if (!cpu_buf)
14028
return libbpf_err(-ENOENT);
14029
14030
return cpu_buf->fd;
14031
}
14032
14033
int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
14034
{
14035
struct perf_cpu_buf *cpu_buf;
14036
14037
if (buf_idx >= pb->cpu_cnt)
14038
return libbpf_err(-EINVAL);
14039
14040
cpu_buf = pb->cpu_bufs[buf_idx];
14041
if (!cpu_buf)
14042
return libbpf_err(-ENOENT);
14043
14044
*buf = cpu_buf->base;
14045
*buf_size = pb->mmap_size;
14046
return 0;
14047
}
14048
14049
/*
14050
* Consume data from perf ring buffer corresponding to slot *buf_idx* in
14051
* PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
14052
* consume, do nothing and return success.
14053
* Returns:
14054
* - 0 on success;
14055
* - <0 on failure.
14056
*/
14057
int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
14058
{
14059
struct perf_cpu_buf *cpu_buf;
14060
14061
if (buf_idx >= pb->cpu_cnt)
14062
return libbpf_err(-EINVAL);
14063
14064
cpu_buf = pb->cpu_bufs[buf_idx];
14065
if (!cpu_buf)
14066
return libbpf_err(-ENOENT);
14067
14068
return perf_buffer__process_records(pb, cpu_buf);
14069
}
14070
14071
int perf_buffer__consume(struct perf_buffer *pb)
14072
{
14073
int i, err;
14074
14075
for (i = 0; i < pb->cpu_cnt; i++) {
14076
struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
14077
14078
if (!cpu_buf)
14079
continue;
14080
14081
err = perf_buffer__process_records(pb, cpu_buf);
14082
if (err) {
14083
pr_warn("perf_buffer: failed to process records in buffer #%d: %s\n",
14084
i, errstr(err));
14085
return libbpf_err(err);
14086
}
14087
}
14088
return 0;
14089
}
14090
14091
int bpf_program__set_attach_target(struct bpf_program *prog,
14092
int attach_prog_fd,
14093
const char *attach_func_name)
14094
{
14095
int btf_obj_fd = 0, btf_id = 0, err;
14096
14097
if (!prog || attach_prog_fd < 0)
14098
return libbpf_err(-EINVAL);
14099
14100
if (prog->obj->state >= OBJ_LOADED)
14101
return libbpf_err(-EINVAL);
14102
14103
if (attach_prog_fd && !attach_func_name) {
14104
/* Store attach_prog_fd. The BTF ID will be resolved later during
14105
* the normal object/program load phase.
14106
*/
14107
prog->attach_prog_fd = attach_prog_fd;
14108
return 0;
14109
}
14110
14111
if (attach_prog_fd) {
14112
btf_id = libbpf_find_prog_btf_id(attach_func_name,
14113
attach_prog_fd, prog->obj->token_fd);
14114
if (btf_id < 0)
14115
return libbpf_err(btf_id);
14116
} else {
14117
if (!attach_func_name)
14118
return libbpf_err(-EINVAL);
14119
14120
/* load btf_vmlinux, if not yet */
14121
err = bpf_object__load_vmlinux_btf(prog->obj, true);
14122
if (err)
14123
return libbpf_err(err);
14124
err = find_kernel_btf_id(prog->obj, attach_func_name,
14125
prog->expected_attach_type,
14126
&btf_obj_fd, &btf_id);
14127
if (err)
14128
return libbpf_err(err);
14129
}
14130
14131
prog->attach_btf_id = btf_id;
14132
prog->attach_btf_obj_fd = btf_obj_fd;
14133
prog->attach_prog_fd = attach_prog_fd;
14134
return 0;
14135
}
14136
14137
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
14138
{
14139
int err = 0, n, len, start, end = -1;
14140
bool *tmp;
14141
14142
*mask = NULL;
14143
*mask_sz = 0;
14144
14145
/* Each sub string separated by ',' has format \d+-\d+ or \d+ */
14146
while (*s) {
14147
if (*s == ',' || *s == '\n') {
14148
s++;
14149
continue;
14150
}
14151
n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
14152
if (n <= 0 || n > 2) {
14153
pr_warn("Failed to get CPU range %s: %d\n", s, n);
14154
err = -EINVAL;
14155
goto cleanup;
14156
} else if (n == 1) {
14157
end = start;
14158
}
14159
if (start < 0 || start > end) {
14160
pr_warn("Invalid CPU range [%d,%d] in %s\n",
14161
start, end, s);
14162
err = -EINVAL;
14163
goto cleanup;
14164
}
14165
tmp = realloc(*mask, end + 1);
14166
if (!tmp) {
14167
err = -ENOMEM;
14168
goto cleanup;
14169
}
14170
*mask = tmp;
14171
memset(tmp + *mask_sz, 0, start - *mask_sz);
14172
memset(tmp + start, 1, end - start + 1);
14173
*mask_sz = end + 1;
14174
s += len;
14175
}
14176
if (!*mask_sz) {
14177
pr_warn("Empty CPU range\n");
14178
return -EINVAL;
14179
}
14180
return 0;
14181
cleanup:
14182
free(*mask);
14183
*mask = NULL;
14184
return err;
14185
}
14186
14187
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
14188
{
14189
int fd, err = 0, len;
14190
char buf[128];
14191
14192
fd = open(fcpu, O_RDONLY | O_CLOEXEC);
14193
if (fd < 0) {
14194
err = -errno;
14195
pr_warn("Failed to open cpu mask file %s: %s\n", fcpu, errstr(err));
14196
return err;
14197
}
14198
len = read(fd, buf, sizeof(buf));
14199
close(fd);
14200
if (len <= 0) {
14201
err = len ? -errno : -EINVAL;
14202
pr_warn("Failed to read cpu mask from %s: %s\n", fcpu, errstr(err));
14203
return err;
14204
}
14205
if (len >= sizeof(buf)) {
14206
pr_warn("CPU mask is too big in file %s\n", fcpu);
14207
return -E2BIG;
14208
}
14209
buf[len] = '\0';
14210
14211
return parse_cpu_mask_str(buf, mask, mask_sz);
14212
}
14213
14214
int libbpf_num_possible_cpus(void)
14215
{
14216
static const char *fcpu = "/sys/devices/system/cpu/possible";
14217
static int cpus;
14218
int err, n, i, tmp_cpus;
14219
bool *mask;
14220
14221
tmp_cpus = READ_ONCE(cpus);
14222
if (tmp_cpus > 0)
14223
return tmp_cpus;
14224
14225
err = parse_cpu_mask_file(fcpu, &mask, &n);
14226
if (err)
14227
return libbpf_err(err);
14228
14229
tmp_cpus = 0;
14230
for (i = 0; i < n; i++) {
14231
if (mask[i])
14232
tmp_cpus++;
14233
}
14234
free(mask);
14235
14236
WRITE_ONCE(cpus, tmp_cpus);
14237
return tmp_cpus;
14238
}
14239
14240
static int populate_skeleton_maps(const struct bpf_object *obj,
14241
struct bpf_map_skeleton *maps,
14242
size_t map_cnt, size_t map_skel_sz)
14243
{
14244
int i;
14245
14246
for (i = 0; i < map_cnt; i++) {
14247
struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz;
14248
struct bpf_map **map = map_skel->map;
14249
const char *name = map_skel->name;
14250
void **mmaped = map_skel->mmaped;
14251
14252
*map = bpf_object__find_map_by_name(obj, name);
14253
if (!*map) {
14254
pr_warn("failed to find skeleton map '%s'\n", name);
14255
return -ESRCH;
14256
}
14257
14258
/* externs shouldn't be pre-setup from user code */
14259
if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
14260
*mmaped = (*map)->mmaped;
14261
}
14262
return 0;
14263
}
14264
14265
static int populate_skeleton_progs(const struct bpf_object *obj,
14266
struct bpf_prog_skeleton *progs,
14267
size_t prog_cnt, size_t prog_skel_sz)
14268
{
14269
int i;
14270
14271
for (i = 0; i < prog_cnt; i++) {
14272
struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz;
14273
struct bpf_program **prog = prog_skel->prog;
14274
const char *name = prog_skel->name;
14275
14276
*prog = bpf_object__find_program_by_name(obj, name);
14277
if (!*prog) {
14278
pr_warn("failed to find skeleton program '%s'\n", name);
14279
return -ESRCH;
14280
}
14281
}
14282
return 0;
14283
}
14284
14285
int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
14286
const struct bpf_object_open_opts *opts)
14287
{
14288
struct bpf_object *obj;
14289
int err;
14290
14291
obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts);
14292
if (IS_ERR(obj)) {
14293
err = PTR_ERR(obj);
14294
pr_warn("failed to initialize skeleton BPF object '%s': %s\n",
14295
s->name, errstr(err));
14296
return libbpf_err(err);
14297
}
14298
14299
*s->obj = obj;
14300
err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz);
14301
if (err) {
14302
pr_warn("failed to populate skeleton maps for '%s': %s\n", s->name, errstr(err));
14303
return libbpf_err(err);
14304
}
14305
14306
err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz);
14307
if (err) {
14308
pr_warn("failed to populate skeleton progs for '%s': %s\n", s->name, errstr(err));
14309
return libbpf_err(err);
14310
}
14311
14312
return 0;
14313
}
14314
14315
int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
14316
{
14317
int err, len, var_idx, i;
14318
const char *var_name;
14319
const struct bpf_map *map;
14320
struct btf *btf;
14321
__u32 map_type_id;
14322
const struct btf_type *map_type, *var_type;
14323
const struct bpf_var_skeleton *var_skel;
14324
struct btf_var_secinfo *var;
14325
14326
if (!s->obj)
14327
return libbpf_err(-EINVAL);
14328
14329
btf = bpf_object__btf(s->obj);
14330
if (!btf) {
14331
pr_warn("subskeletons require BTF at runtime (object %s)\n",
14332
bpf_object__name(s->obj));
14333
return libbpf_err(-errno);
14334
}
14335
14336
err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz);
14337
if (err) {
14338
pr_warn("failed to populate subskeleton maps: %s\n", errstr(err));
14339
return libbpf_err(err);
14340
}
14341
14342
err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz);
14343
if (err) {
14344
pr_warn("failed to populate subskeleton maps: %s\n", errstr(err));
14345
return libbpf_err(err);
14346
}
14347
14348
for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
14349
var_skel = (void *)s->vars + var_idx * s->var_skel_sz;
14350
map = *var_skel->map;
14351
map_type_id = bpf_map__btf_value_type_id(map);
14352
map_type = btf__type_by_id(btf, map_type_id);
14353
14354
if (!btf_is_datasec(map_type)) {
14355
pr_warn("type for map '%1$s' is not a datasec: %2$s\n",
14356
bpf_map__name(map),
14357
__btf_kind_str(btf_kind(map_type)));
14358
return libbpf_err(-EINVAL);
14359
}
14360
14361
len = btf_vlen(map_type);
14362
var = btf_var_secinfos(map_type);
14363
for (i = 0; i < len; i++, var++) {
14364
var_type = btf__type_by_id(btf, var->type);
14365
var_name = btf__name_by_offset(btf, var_type->name_off);
14366
if (strcmp(var_name, var_skel->name) == 0) {
14367
*var_skel->addr = map->mmaped + var->offset;
14368
break;
14369
}
14370
}
14371
}
14372
return 0;
14373
}
14374
14375
void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
14376
{
14377
if (!s)
14378
return;
14379
free(s->maps);
14380
free(s->progs);
14381
free(s->vars);
14382
free(s);
14383
}
14384
14385
int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
14386
{
14387
int i, err;
14388
14389
err = bpf_object__load(*s->obj);
14390
if (err) {
14391
pr_warn("failed to load BPF skeleton '%s': %s\n", s->name, errstr(err));
14392
return libbpf_err(err);
14393
}
14394
14395
for (i = 0; i < s->map_cnt; i++) {
14396
struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
14397
struct bpf_map *map = *map_skel->map;
14398
14399
if (!map_skel->mmaped)
14400
continue;
14401
14402
*map_skel->mmaped = map->mmaped;
14403
}
14404
14405
return 0;
14406
}
14407
14408
int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
14409
{
14410
int i, err;
14411
14412
for (i = 0; i < s->prog_cnt; i++) {
14413
struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz;
14414
struct bpf_program *prog = *prog_skel->prog;
14415
struct bpf_link **link = prog_skel->link;
14416
14417
if (!prog->autoload || !prog->autoattach)
14418
continue;
14419
14420
/* auto-attaching not supported for this program */
14421
if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
14422
continue;
14423
14424
/* if user already set the link manually, don't attempt auto-attach */
14425
if (*link)
14426
continue;
14427
14428
err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
14429
if (err) {
14430
pr_warn("prog '%s': failed to auto-attach: %s\n",
14431
bpf_program__name(prog), errstr(err));
14432
return libbpf_err(err);
14433
}
14434
14435
/* It's possible that for some SEC() definitions auto-attach
14436
* is supported in some cases (e.g., if definition completely
14437
* specifies target information), but is not in other cases.
14438
* SEC("uprobe") is one such case. If user specified target
14439
* binary and function name, such BPF program can be
14440
* auto-attached. But if not, it shouldn't trigger skeleton's
14441
* attach to fail. It should just be skipped.
14442
* attach_fn signals such case with returning 0 (no error) and
14443
* setting link to NULL.
14444
*/
14445
}
14446
14447
14448
for (i = 0; i < s->map_cnt; i++) {
14449
struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
14450
struct bpf_map *map = *map_skel->map;
14451
struct bpf_link **link;
14452
14453
if (!map->autocreate || !map->autoattach)
14454
continue;
14455
14456
/* only struct_ops maps can be attached */
14457
if (!bpf_map__is_struct_ops(map))
14458
continue;
14459
14460
/* skeleton is created with earlier version of bpftool, notify user */
14461
if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) {
14462
pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n",
14463
bpf_map__name(map));
14464
continue;
14465
}
14466
14467
link = map_skel->link;
14468
if (!link) {
14469
pr_warn("map '%s': BPF map skeleton link is uninitialized\n",
14470
bpf_map__name(map));
14471
continue;
14472
}
14473
14474
if (*link)
14475
continue;
14476
14477
*link = bpf_map__attach_struct_ops(map);
14478
if (!*link) {
14479
err = -errno;
14480
pr_warn("map '%s': failed to auto-attach: %s\n",
14481
bpf_map__name(map), errstr(err));
14482
return libbpf_err(err);
14483
}
14484
}
14485
14486
return 0;
14487
}
14488
14489
void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
14490
{
14491
int i;
14492
14493
for (i = 0; i < s->prog_cnt; i++) {
14494
struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz;
14495
struct bpf_link **link = prog_skel->link;
14496
14497
bpf_link__destroy(*link);
14498
*link = NULL;
14499
}
14500
14501
if (s->map_skel_sz < sizeof(struct bpf_map_skeleton))
14502
return;
14503
14504
for (i = 0; i < s->map_cnt; i++) {
14505
struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
14506
struct bpf_link **link = map_skel->link;
14507
14508
if (link) {
14509
bpf_link__destroy(*link);
14510
*link = NULL;
14511
}
14512
}
14513
}
14514
14515
void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
14516
{
14517
if (!s)
14518
return;
14519
14520
bpf_object__detach_skeleton(s);
14521
if (s->obj)
14522
bpf_object__close(*s->obj);
14523
free(s->maps);
14524
free(s->progs);
14525
free(s);
14526
}
14527
14528