Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/kernel/bpf/core.c
49270 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Linux Socket Filter - Kernel level socket filtering
4
*
5
* Based on the design of the Berkeley Packet Filter. The new
6
* internal format has been designed by PLUMgrid:
7
*
8
* Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
9
*
10
* Authors:
11
*
12
* Jay Schulist <[email protected]>
13
* Alexei Starovoitov <[email protected]>
14
* Daniel Borkmann <[email protected]>
15
*
16
* Andi Kleen - Fix a few bad bugs and races.
17
* Kris Katterjohn - Added many additional checks in bpf_check_classic()
18
*/
19
20
#include <uapi/linux/btf.h>
21
#include <crypto/sha1.h>
22
#include <linux/filter.h>
23
#include <linux/skbuff.h>
24
#include <linux/vmalloc.h>
25
#include <linux/prandom.h>
26
#include <linux/bpf.h>
27
#include <linux/btf.h>
28
#include <linux/objtool.h>
29
#include <linux/overflow.h>
30
#include <linux/rbtree_latch.h>
31
#include <linux/kallsyms.h>
32
#include <linux/rcupdate.h>
33
#include <linux/perf_event.h>
34
#include <linux/extable.h>
35
#include <linux/log2.h>
36
#include <linux/bpf_verifier.h>
37
#include <linux/nodemask.h>
38
#include <linux/nospec.h>
39
#include <linux/bpf_mem_alloc.h>
40
#include <linux/memcontrol.h>
41
#include <linux/execmem.h>
42
#include <crypto/sha2.h>
43
44
#include <asm/barrier.h>
45
#include <linux/unaligned.h>
46
47
/* Registers */
48
#define BPF_R0 regs[BPF_REG_0]
49
#define BPF_R1 regs[BPF_REG_1]
50
#define BPF_R2 regs[BPF_REG_2]
51
#define BPF_R3 regs[BPF_REG_3]
52
#define BPF_R4 regs[BPF_REG_4]
53
#define BPF_R5 regs[BPF_REG_5]
54
#define BPF_R6 regs[BPF_REG_6]
55
#define BPF_R7 regs[BPF_REG_7]
56
#define BPF_R8 regs[BPF_REG_8]
57
#define BPF_R9 regs[BPF_REG_9]
58
#define BPF_R10 regs[BPF_REG_10]
59
60
/* Named registers */
61
#define DST regs[insn->dst_reg]
62
#define SRC regs[insn->src_reg]
63
#define FP regs[BPF_REG_FP]
64
#define AX regs[BPF_REG_AX]
65
#define ARG1 regs[BPF_REG_ARG1]
66
#define CTX regs[BPF_REG_CTX]
67
#define OFF insn->off
68
#define IMM insn->imm
69
70
struct bpf_mem_alloc bpf_global_ma;
71
bool bpf_global_ma_set;
72
73
/* No hurry in this branch
74
*
75
* Exported for the bpf jit load helper.
76
*/
77
void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
78
{
79
u8 *ptr = NULL;
80
81
if (k >= SKF_NET_OFF) {
82
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
83
} else if (k >= SKF_LL_OFF) {
84
if (unlikely(!skb_mac_header_was_set(skb)))
85
return NULL;
86
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
87
}
88
if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
89
return ptr;
90
91
return NULL;
92
}
93
94
/* tell bpf programs that include vmlinux.h kernel's PAGE_SIZE */
95
enum page_size_enum {
96
__PAGE_SIZE = PAGE_SIZE
97
};
98
99
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
100
{
101
gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
102
struct bpf_prog_aux *aux;
103
struct bpf_prog *fp;
104
105
size = round_up(size, __PAGE_SIZE);
106
fp = __vmalloc(size, gfp_flags);
107
if (fp == NULL)
108
return NULL;
109
110
aux = kzalloc(sizeof(*aux), bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
111
if (aux == NULL) {
112
vfree(fp);
113
return NULL;
114
}
115
fp->active = alloc_percpu_gfp(int, bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
116
if (!fp->active) {
117
vfree(fp);
118
kfree(aux);
119
return NULL;
120
}
121
122
fp->pages = size / PAGE_SIZE;
123
fp->aux = aux;
124
fp->aux->main_prog_aux = aux;
125
fp->aux->prog = fp;
126
fp->jit_requested = ebpf_jit_enabled();
127
fp->blinding_requested = bpf_jit_blinding_enabled(fp);
128
#ifdef CONFIG_CGROUP_BPF
129
aux->cgroup_atype = CGROUP_BPF_ATTACH_TYPE_INVALID;
130
#endif
131
132
INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
133
#ifdef CONFIG_FINEIBT
134
INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode);
135
#endif
136
mutex_init(&fp->aux->used_maps_mutex);
137
mutex_init(&fp->aux->ext_mutex);
138
mutex_init(&fp->aux->dst_mutex);
139
140
#ifdef CONFIG_BPF_SYSCALL
141
bpf_prog_stream_init(fp);
142
#endif
143
144
return fp;
145
}
146
147
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
148
{
149
gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
150
struct bpf_prog *prog;
151
int cpu;
152
153
prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
154
if (!prog)
155
return NULL;
156
157
prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
158
if (!prog->stats) {
159
free_percpu(prog->active);
160
kfree(prog->aux);
161
vfree(prog);
162
return NULL;
163
}
164
165
for_each_possible_cpu(cpu) {
166
struct bpf_prog_stats *pstats;
167
168
pstats = per_cpu_ptr(prog->stats, cpu);
169
u64_stats_init(&pstats->syncp);
170
}
171
return prog;
172
}
173
EXPORT_SYMBOL_GPL(bpf_prog_alloc);
174
175
int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
176
{
177
if (!prog->aux->nr_linfo || !prog->jit_requested)
178
return 0;
179
180
prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo,
181
sizeof(*prog->aux->jited_linfo),
182
bpf_memcg_flags(GFP_KERNEL | __GFP_NOWARN));
183
if (!prog->aux->jited_linfo)
184
return -ENOMEM;
185
186
return 0;
187
}
188
189
void bpf_prog_jit_attempt_done(struct bpf_prog *prog)
190
{
191
if (prog->aux->jited_linfo &&
192
(!prog->jited || !prog->aux->jited_linfo[0])) {
193
kvfree(prog->aux->jited_linfo);
194
prog->aux->jited_linfo = NULL;
195
}
196
197
kfree(prog->aux->kfunc_tab);
198
prog->aux->kfunc_tab = NULL;
199
}
200
201
/* The jit engine is responsible to provide an array
202
* for insn_off to the jited_off mapping (insn_to_jit_off).
203
*
204
* The idx to this array is the insn_off. Hence, the insn_off
205
* here is relative to the prog itself instead of the main prog.
206
* This array has one entry for each xlated bpf insn.
207
*
208
* jited_off is the byte off to the end of the jited insn.
209
*
210
* Hence, with
211
* insn_start:
212
* The first bpf insn off of the prog. The insn off
213
* here is relative to the main prog.
214
* e.g. if prog is a subprog, insn_start > 0
215
* linfo_idx:
216
* The prog's idx to prog->aux->linfo and jited_linfo
217
*
218
* jited_linfo[linfo_idx] = prog->bpf_func
219
*
220
* For i > linfo_idx,
221
*
222
* jited_linfo[i] = prog->bpf_func +
223
* insn_to_jit_off[linfo[i].insn_off - insn_start - 1]
224
*/
225
void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
226
const u32 *insn_to_jit_off)
227
{
228
u32 linfo_idx, insn_start, insn_end, nr_linfo, i;
229
const struct bpf_line_info *linfo;
230
void **jited_linfo;
231
232
if (!prog->aux->jited_linfo || prog->aux->func_idx > prog->aux->func_cnt)
233
/* Userspace did not provide linfo */
234
return;
235
236
linfo_idx = prog->aux->linfo_idx;
237
linfo = &prog->aux->linfo[linfo_idx];
238
insn_start = linfo[0].insn_off;
239
insn_end = insn_start + prog->len;
240
241
jited_linfo = &prog->aux->jited_linfo[linfo_idx];
242
jited_linfo[0] = prog->bpf_func;
243
244
nr_linfo = prog->aux->nr_linfo - linfo_idx;
245
246
for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++)
247
/* The verifier ensures that linfo[i].insn_off is
248
* strictly increasing
249
*/
250
jited_linfo[i] = prog->bpf_func +
251
insn_to_jit_off[linfo[i].insn_off - insn_start - 1];
252
}
253
254
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
255
gfp_t gfp_extra_flags)
256
{
257
gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
258
struct bpf_prog *fp;
259
u32 pages;
260
261
size = round_up(size, PAGE_SIZE);
262
pages = size / PAGE_SIZE;
263
if (pages <= fp_old->pages)
264
return fp_old;
265
266
fp = __vmalloc(size, gfp_flags);
267
if (fp) {
268
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
269
fp->pages = pages;
270
fp->aux->prog = fp;
271
272
/* We keep fp->aux from fp_old around in the new
273
* reallocated structure.
274
*/
275
fp_old->aux = NULL;
276
fp_old->stats = NULL;
277
fp_old->active = NULL;
278
__bpf_prog_free(fp_old);
279
}
280
281
return fp;
282
}
283
284
void __bpf_prog_free(struct bpf_prog *fp)
285
{
286
if (fp->aux) {
287
mutex_destroy(&fp->aux->used_maps_mutex);
288
mutex_destroy(&fp->aux->dst_mutex);
289
kfree(fp->aux->poke_tab);
290
kfree(fp->aux);
291
}
292
free_percpu(fp->stats);
293
free_percpu(fp->active);
294
vfree(fp);
295
}
296
297
int bpf_prog_calc_tag(struct bpf_prog *fp)
298
{
299
size_t size = bpf_prog_insn_size(fp);
300
struct bpf_insn *dst;
301
bool was_ld_map;
302
u32 i;
303
304
dst = vmalloc(size);
305
if (!dst)
306
return -ENOMEM;
307
308
/* We need to take out the map fd for the digest calculation
309
* since they are unstable from user space side.
310
*/
311
for (i = 0, was_ld_map = false; i < fp->len; i++) {
312
dst[i] = fp->insnsi[i];
313
if (!was_ld_map &&
314
dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
315
(dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
316
dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
317
was_ld_map = true;
318
dst[i].imm = 0;
319
} else if (was_ld_map &&
320
dst[i].code == 0 &&
321
dst[i].dst_reg == 0 &&
322
dst[i].src_reg == 0 &&
323
dst[i].off == 0) {
324
was_ld_map = false;
325
dst[i].imm = 0;
326
} else {
327
was_ld_map = false;
328
}
329
}
330
sha256((u8 *)dst, size, fp->digest);
331
vfree(dst);
332
return 0;
333
}
334
335
static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
336
s32 end_new, s32 curr, const bool probe_pass)
337
{
338
const s64 imm_min = S32_MIN, imm_max = S32_MAX;
339
s32 delta = end_new - end_old;
340
s64 imm = insn->imm;
341
342
if (curr < pos && curr + imm + 1 >= end_old)
343
imm += delta;
344
else if (curr >= end_new && curr + imm + 1 < end_new)
345
imm -= delta;
346
if (imm < imm_min || imm > imm_max)
347
return -ERANGE;
348
if (!probe_pass)
349
insn->imm = imm;
350
return 0;
351
}
352
353
static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
354
s32 end_new, s32 curr, const bool probe_pass)
355
{
356
s64 off_min, off_max, off;
357
s32 delta = end_new - end_old;
358
359
if (insn->code == (BPF_JMP32 | BPF_JA)) {
360
off = insn->imm;
361
off_min = S32_MIN;
362
off_max = S32_MAX;
363
} else {
364
off = insn->off;
365
off_min = S16_MIN;
366
off_max = S16_MAX;
367
}
368
369
if (curr < pos && curr + off + 1 >= end_old)
370
off += delta;
371
else if (curr >= end_new && curr + off + 1 < end_new)
372
off -= delta;
373
if (off < off_min || off > off_max)
374
return -ERANGE;
375
if (!probe_pass) {
376
if (insn->code == (BPF_JMP32 | BPF_JA))
377
insn->imm = off;
378
else
379
insn->off = off;
380
}
381
return 0;
382
}
383
384
static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
385
s32 end_new, const bool probe_pass)
386
{
387
u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0);
388
struct bpf_insn *insn = prog->insnsi;
389
int ret = 0;
390
391
for (i = 0; i < insn_cnt; i++, insn++) {
392
u8 code;
393
394
/* In the probing pass we still operate on the original,
395
* unpatched image in order to check overflows before we
396
* do any other adjustments. Therefore skip the patchlet.
397
*/
398
if (probe_pass && i == pos) {
399
i = end_new;
400
insn = prog->insnsi + end_old;
401
}
402
if (bpf_pseudo_func(insn)) {
403
ret = bpf_adj_delta_to_imm(insn, pos, end_old,
404
end_new, i, probe_pass);
405
if (ret)
406
return ret;
407
continue;
408
}
409
code = insn->code;
410
if ((BPF_CLASS(code) != BPF_JMP &&
411
BPF_CLASS(code) != BPF_JMP32) ||
412
BPF_OP(code) == BPF_EXIT)
413
continue;
414
/* Adjust offset of jmps if we cross patch boundaries. */
415
if (BPF_OP(code) == BPF_CALL) {
416
if (insn->src_reg != BPF_PSEUDO_CALL)
417
continue;
418
ret = bpf_adj_delta_to_imm(insn, pos, end_old,
419
end_new, i, probe_pass);
420
} else {
421
ret = bpf_adj_delta_to_off(insn, pos, end_old,
422
end_new, i, probe_pass);
423
}
424
if (ret)
425
break;
426
}
427
428
return ret;
429
}
430
431
static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta)
432
{
433
struct bpf_line_info *linfo;
434
u32 i, nr_linfo;
435
436
nr_linfo = prog->aux->nr_linfo;
437
if (!nr_linfo || !delta)
438
return;
439
440
linfo = prog->aux->linfo;
441
442
for (i = 0; i < nr_linfo; i++)
443
if (off < linfo[i].insn_off)
444
break;
445
446
/* Push all off < linfo[i].insn_off by delta */
447
for (; i < nr_linfo; i++)
448
linfo[i].insn_off += delta;
449
}
450
451
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
452
const struct bpf_insn *patch, u32 len)
453
{
454
u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
455
const u32 cnt_max = S16_MAX;
456
struct bpf_prog *prog_adj;
457
int err;
458
459
/* Since our patchlet doesn't expand the image, we're done. */
460
if (insn_delta == 0) {
461
memcpy(prog->insnsi + off, patch, sizeof(*patch));
462
return prog;
463
}
464
465
insn_adj_cnt = prog->len + insn_delta;
466
467
/* Reject anything that would potentially let the insn->off
468
* target overflow when we have excessive program expansions.
469
* We need to probe here before we do any reallocation where
470
* we afterwards may not fail anymore.
471
*/
472
if (insn_adj_cnt > cnt_max &&
473
(err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
474
return ERR_PTR(err);
475
476
/* Several new instructions need to be inserted. Make room
477
* for them. Likely, there's no need for a new allocation as
478
* last page could have large enough tailroom.
479
*/
480
prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
481
GFP_USER);
482
if (!prog_adj)
483
return ERR_PTR(-ENOMEM);
484
485
prog_adj->len = insn_adj_cnt;
486
487
/* Patching happens in 3 steps:
488
*
489
* 1) Move over tail of insnsi from next instruction onwards,
490
* so we can patch the single target insn with one or more
491
* new ones (patching is always from 1 to n insns, n > 0).
492
* 2) Inject new instructions at the target location.
493
* 3) Adjust branch offsets if necessary.
494
*/
495
insn_rest = insn_adj_cnt - off - len;
496
497
memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1,
498
sizeof(*patch) * insn_rest);
499
memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
500
501
/* We are guaranteed to not fail at this point, otherwise
502
* the ship has sailed to reverse to the original state. An
503
* overflow cannot happen at this point.
504
*/
505
BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false));
506
507
bpf_adj_linfo(prog_adj, off, insn_delta);
508
509
return prog_adj;
510
}
511
512
int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
513
{
514
int err;
515
516
/* Branch offsets can't overflow when program is shrinking, no need
517
* to call bpf_adj_branches(..., true) here
518
*/
519
memmove(prog->insnsi + off, prog->insnsi + off + cnt,
520
sizeof(struct bpf_insn) * (prog->len - off - cnt));
521
prog->len -= cnt;
522
523
err = bpf_adj_branches(prog, off, off + cnt, off, false);
524
WARN_ON_ONCE(err);
525
return err;
526
}
527
528
static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
529
{
530
int i;
531
532
for (i = 0; i < fp->aux->real_func_cnt; i++)
533
bpf_prog_kallsyms_del(fp->aux->func[i]);
534
}
535
536
void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
537
{
538
bpf_prog_kallsyms_del_subprogs(fp);
539
bpf_prog_kallsyms_del(fp);
540
}
541
542
#ifdef CONFIG_BPF_JIT
543
/* All BPF JIT sysctl knobs here. */
544
int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
545
int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
546
int bpf_jit_harden __read_mostly;
547
long bpf_jit_limit __read_mostly;
548
long bpf_jit_limit_max __read_mostly;
549
550
static void
551
bpf_prog_ksym_set_addr(struct bpf_prog *prog)
552
{
553
WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
554
555
prog->aux->ksym.start = (unsigned long) prog->bpf_func;
556
prog->aux->ksym.end = prog->aux->ksym.start + prog->jited_len;
557
}
558
559
static void
560
bpf_prog_ksym_set_name(struct bpf_prog *prog)
561
{
562
char *sym = prog->aux->ksym.name;
563
const char *end = sym + KSYM_NAME_LEN;
564
const struct btf_type *type;
565
const char *func_name;
566
567
BUILD_BUG_ON(sizeof("bpf_prog_") +
568
sizeof(prog->tag) * 2 +
569
/* name has been null terminated.
570
* We should need +1 for the '_' preceding
571
* the name. However, the null character
572
* is double counted between the name and the
573
* sizeof("bpf_prog_") above, so we omit
574
* the +1 here.
575
*/
576
sizeof(prog->aux->name) > KSYM_NAME_LEN);
577
578
sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
579
sym = bin2hex(sym, prog->tag, sizeof(prog->tag));
580
581
/* prog->aux->name will be ignored if full btf name is available */
582
if (prog->aux->func_info_cnt && prog->aux->func_idx < prog->aux->func_info_cnt) {
583
type = btf_type_by_id(prog->aux->btf,
584
prog->aux->func_info[prog->aux->func_idx].type_id);
585
func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
586
snprintf(sym, (size_t)(end - sym), "_%s", func_name);
587
return;
588
}
589
590
if (prog->aux->name[0])
591
snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
592
else
593
*sym = 0;
594
}
595
596
static unsigned long bpf_get_ksym_start(struct latch_tree_node *n)
597
{
598
return container_of(n, struct bpf_ksym, tnode)->start;
599
}
600
601
static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
602
struct latch_tree_node *b)
603
{
604
return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
605
}
606
607
static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
608
{
609
unsigned long val = (unsigned long)key;
610
const struct bpf_ksym *ksym;
611
612
ksym = container_of(n, struct bpf_ksym, tnode);
613
614
if (val < ksym->start)
615
return -1;
616
/* Ensure that we detect return addresses as part of the program, when
617
* the final instruction is a call for a program part of the stack
618
* trace. Therefore, do val > ksym->end instead of val >= ksym->end.
619
*/
620
if (val > ksym->end)
621
return 1;
622
623
return 0;
624
}
625
626
static const struct latch_tree_ops bpf_tree_ops = {
627
.less = bpf_tree_less,
628
.comp = bpf_tree_comp,
629
};
630
631
static DEFINE_SPINLOCK(bpf_lock);
632
static LIST_HEAD(bpf_kallsyms);
633
static struct latch_tree_root bpf_tree __cacheline_aligned;
634
635
void bpf_ksym_add(struct bpf_ksym *ksym)
636
{
637
spin_lock_bh(&bpf_lock);
638
WARN_ON_ONCE(!list_empty(&ksym->lnode));
639
list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms);
640
latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
641
spin_unlock_bh(&bpf_lock);
642
}
643
644
static void __bpf_ksym_del(struct bpf_ksym *ksym)
645
{
646
if (list_empty(&ksym->lnode))
647
return;
648
649
latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
650
list_del_rcu(&ksym->lnode);
651
}
652
653
void bpf_ksym_del(struct bpf_ksym *ksym)
654
{
655
spin_lock_bh(&bpf_lock);
656
__bpf_ksym_del(ksym);
657
spin_unlock_bh(&bpf_lock);
658
}
659
660
static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
661
{
662
return fp->jited && !bpf_prog_was_classic(fp);
663
}
664
665
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
666
{
667
if (!bpf_prog_kallsyms_candidate(fp) ||
668
!bpf_token_capable(fp->aux->token, CAP_BPF))
669
return;
670
671
bpf_prog_ksym_set_addr(fp);
672
bpf_prog_ksym_set_name(fp);
673
fp->aux->ksym.prog = true;
674
675
bpf_ksym_add(&fp->aux->ksym);
676
677
#ifdef CONFIG_FINEIBT
678
/*
679
* When FineIBT, code in the __cfi_foo() symbols can get executed
680
* and hence unwinder needs help.
681
*/
682
if (cfi_mode != CFI_FINEIBT)
683
return;
684
685
snprintf(fp->aux->ksym_prefix.name, KSYM_NAME_LEN,
686
"__cfi_%s", fp->aux->ksym.name);
687
688
fp->aux->ksym_prefix.start = (unsigned long) fp->bpf_func - 16;
689
fp->aux->ksym_prefix.end = (unsigned long) fp->bpf_func;
690
691
bpf_ksym_add(&fp->aux->ksym_prefix);
692
#endif
693
}
694
695
void bpf_prog_kallsyms_del(struct bpf_prog *fp)
696
{
697
if (!bpf_prog_kallsyms_candidate(fp))
698
return;
699
700
bpf_ksym_del(&fp->aux->ksym);
701
#ifdef CONFIG_FINEIBT
702
if (cfi_mode != CFI_FINEIBT)
703
return;
704
bpf_ksym_del(&fp->aux->ksym_prefix);
705
#endif
706
}
707
708
static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
709
{
710
struct latch_tree_node *n;
711
712
n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
713
return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
714
}
715
716
int __bpf_address_lookup(unsigned long addr, unsigned long *size,
717
unsigned long *off, char *sym)
718
{
719
struct bpf_ksym *ksym;
720
int ret = 0;
721
722
rcu_read_lock();
723
ksym = bpf_ksym_find(addr);
724
if (ksym) {
725
unsigned long symbol_start = ksym->start;
726
unsigned long symbol_end = ksym->end;
727
728
ret = strscpy(sym, ksym->name, KSYM_NAME_LEN);
729
730
if (size)
731
*size = symbol_end - symbol_start;
732
if (off)
733
*off = addr - symbol_start;
734
}
735
rcu_read_unlock();
736
737
return ret;
738
}
739
740
bool is_bpf_text_address(unsigned long addr)
741
{
742
bool ret;
743
744
rcu_read_lock();
745
ret = bpf_ksym_find(addr) != NULL;
746
rcu_read_unlock();
747
748
return ret;
749
}
750
751
struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
752
{
753
struct bpf_ksym *ksym;
754
755
WARN_ON_ONCE(!rcu_read_lock_held());
756
ksym = bpf_ksym_find(addr);
757
758
return ksym && ksym->prog ?
759
container_of(ksym, struct bpf_prog_aux, ksym)->prog :
760
NULL;
761
}
762
763
bool bpf_has_frame_pointer(unsigned long ip)
764
{
765
struct bpf_ksym *ksym;
766
unsigned long offset;
767
768
guard(rcu)();
769
770
ksym = bpf_ksym_find(ip);
771
if (!ksym || !ksym->fp_start || !ksym->fp_end)
772
return false;
773
774
offset = ip - ksym->start;
775
776
return offset >= ksym->fp_start && offset < ksym->fp_end;
777
}
778
779
const struct exception_table_entry *search_bpf_extables(unsigned long addr)
780
{
781
const struct exception_table_entry *e = NULL;
782
struct bpf_prog *prog;
783
784
rcu_read_lock();
785
prog = bpf_prog_ksym_find(addr);
786
if (!prog)
787
goto out;
788
if (!prog->aux->num_exentries)
789
goto out;
790
791
e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr);
792
out:
793
rcu_read_unlock();
794
return e;
795
}
796
797
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
798
char *sym)
799
{
800
struct bpf_ksym *ksym;
801
unsigned int it = 0;
802
int ret = -ERANGE;
803
804
if (!bpf_jit_kallsyms_enabled())
805
return ret;
806
807
rcu_read_lock();
808
list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) {
809
if (it++ != symnum)
810
continue;
811
812
strscpy(sym, ksym->name, KSYM_NAME_LEN);
813
814
*value = ksym->start;
815
*type = BPF_SYM_ELF_TYPE;
816
817
ret = 0;
818
break;
819
}
820
rcu_read_unlock();
821
822
return ret;
823
}
824
825
int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
826
struct bpf_jit_poke_descriptor *poke)
827
{
828
struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
829
static const u32 poke_tab_max = 1024;
830
u32 slot = prog->aux->size_poke_tab;
831
u32 size = slot + 1;
832
833
if (size > poke_tab_max)
834
return -ENOSPC;
835
if (poke->tailcall_target || poke->tailcall_target_stable ||
836
poke->tailcall_bypass || poke->adj_off || poke->bypass_addr)
837
return -EINVAL;
838
839
switch (poke->reason) {
840
case BPF_POKE_REASON_TAIL_CALL:
841
if (!poke->tail_call.map)
842
return -EINVAL;
843
break;
844
default:
845
return -EINVAL;
846
}
847
848
tab = krealloc_array(tab, size, sizeof(*poke), GFP_KERNEL);
849
if (!tab)
850
return -ENOMEM;
851
852
memcpy(&tab[slot], poke, sizeof(*poke));
853
prog->aux->size_poke_tab = size;
854
prog->aux->poke_tab = tab;
855
856
return slot;
857
}
858
859
/*
860
* BPF program pack allocator.
861
*
862
* Most BPF programs are pretty small. Allocating a hole page for each
863
* program is sometime a waste. Many small bpf program also adds pressure
864
* to instruction TLB. To solve this issue, we introduce a BPF program pack
865
* allocator. The prog_pack allocator uses HPAGE_PMD_SIZE page (2MB on x86)
866
* to host BPF programs.
867
*/
868
#define BPF_PROG_CHUNK_SHIFT 6
869
#define BPF_PROG_CHUNK_SIZE (1 << BPF_PROG_CHUNK_SHIFT)
870
#define BPF_PROG_CHUNK_MASK (~(BPF_PROG_CHUNK_SIZE - 1))
871
872
struct bpf_prog_pack {
873
struct list_head list;
874
void *ptr;
875
unsigned long bitmap[];
876
};
877
878
void bpf_jit_fill_hole_with_zero(void *area, unsigned int size)
879
{
880
memset(area, 0, size);
881
}
882
883
#define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE)
884
885
static DEFINE_MUTEX(pack_mutex);
886
static LIST_HEAD(pack_list);
887
888
/* PMD_SIZE is not available in some special config, e.g. ARCH=arm with
889
* CONFIG_MMU=n. Use PAGE_SIZE in these cases.
890
*/
891
#ifdef PMD_SIZE
892
/* PMD_SIZE is really big for some archs. It doesn't make sense to
893
* reserve too much memory in one allocation. Hardcode BPF_PROG_PACK_SIZE to
894
* 2MiB * num_possible_nodes(). On most architectures PMD_SIZE will be
895
* greater than or equal to 2MB.
896
*/
897
#define BPF_PROG_PACK_SIZE (SZ_2M * num_possible_nodes())
898
#else
899
#define BPF_PROG_PACK_SIZE PAGE_SIZE
900
#endif
901
902
#define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE)
903
904
static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns)
905
{
906
struct bpf_prog_pack *pack;
907
int err;
908
909
pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)),
910
GFP_KERNEL);
911
if (!pack)
912
return NULL;
913
pack->ptr = bpf_jit_alloc_exec(BPF_PROG_PACK_SIZE);
914
if (!pack->ptr)
915
goto out;
916
bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE);
917
bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE);
918
919
set_vm_flush_reset_perms(pack->ptr);
920
err = set_memory_rox((unsigned long)pack->ptr,
921
BPF_PROG_PACK_SIZE / PAGE_SIZE);
922
if (err)
923
goto out;
924
list_add_tail(&pack->list, &pack_list);
925
return pack;
926
927
out:
928
bpf_jit_free_exec(pack->ptr);
929
kfree(pack);
930
return NULL;
931
}
932
933
void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
934
{
935
unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size);
936
struct bpf_prog_pack *pack;
937
unsigned long pos;
938
void *ptr = NULL;
939
940
mutex_lock(&pack_mutex);
941
if (size > BPF_PROG_PACK_SIZE) {
942
size = round_up(size, PAGE_SIZE);
943
ptr = bpf_jit_alloc_exec(size);
944
if (ptr) {
945
int err;
946
947
bpf_fill_ill_insns(ptr, size);
948
set_vm_flush_reset_perms(ptr);
949
err = set_memory_rox((unsigned long)ptr,
950
size / PAGE_SIZE);
951
if (err) {
952
bpf_jit_free_exec(ptr);
953
ptr = NULL;
954
}
955
}
956
goto out;
957
}
958
list_for_each_entry(pack, &pack_list, list) {
959
pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
960
nbits, 0);
961
if (pos < BPF_PROG_CHUNK_COUNT)
962
goto found_free_area;
963
}
964
965
pack = alloc_new_pack(bpf_fill_ill_insns);
966
if (!pack)
967
goto out;
968
969
pos = 0;
970
971
found_free_area:
972
bitmap_set(pack->bitmap, pos, nbits);
973
ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT);
974
975
out:
976
mutex_unlock(&pack_mutex);
977
return ptr;
978
}
979
980
void bpf_prog_pack_free(void *ptr, u32 size)
981
{
982
struct bpf_prog_pack *pack = NULL, *tmp;
983
unsigned int nbits;
984
unsigned long pos;
985
986
mutex_lock(&pack_mutex);
987
if (size > BPF_PROG_PACK_SIZE) {
988
bpf_jit_free_exec(ptr);
989
goto out;
990
}
991
992
list_for_each_entry(tmp, &pack_list, list) {
993
if (ptr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > ptr) {
994
pack = tmp;
995
break;
996
}
997
}
998
999
if (WARN_ONCE(!pack, "bpf_prog_pack bug\n"))
1000
goto out;
1001
1002
nbits = BPF_PROG_SIZE_TO_NBITS(size);
1003
pos = ((unsigned long)ptr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT;
1004
1005
WARN_ONCE(bpf_arch_text_invalidate(ptr, size),
1006
"bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n");
1007
1008
bitmap_clear(pack->bitmap, pos, nbits);
1009
if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
1010
BPF_PROG_CHUNK_COUNT, 0) == 0) {
1011
list_del(&pack->list);
1012
bpf_jit_free_exec(pack->ptr);
1013
kfree(pack);
1014
}
1015
out:
1016
mutex_unlock(&pack_mutex);
1017
}
1018
1019
static atomic_long_t bpf_jit_current;
1020
1021
/* Can be overridden by an arch's JIT compiler if it has a custom,
1022
* dedicated BPF backend memory area, or if neither of the two
1023
* below apply.
1024
*/
1025
u64 __weak bpf_jit_alloc_exec_limit(void)
1026
{
1027
#if defined(MODULES_VADDR)
1028
return MODULES_END - MODULES_VADDR;
1029
#else
1030
return VMALLOC_END - VMALLOC_START;
1031
#endif
1032
}
1033
1034
static int __init bpf_jit_charge_init(void)
1035
{
1036
/* Only used as heuristic here to derive limit. */
1037
bpf_jit_limit_max = bpf_jit_alloc_exec_limit();
1038
bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 1,
1039
PAGE_SIZE), LONG_MAX);
1040
return 0;
1041
}
1042
pure_initcall(bpf_jit_charge_init);
1043
1044
int bpf_jit_charge_modmem(u32 size)
1045
{
1046
if (atomic_long_add_return(size, &bpf_jit_current) > READ_ONCE(bpf_jit_limit)) {
1047
if (!bpf_capable()) {
1048
atomic_long_sub(size, &bpf_jit_current);
1049
return -EPERM;
1050
}
1051
}
1052
1053
return 0;
1054
}
1055
1056
void bpf_jit_uncharge_modmem(u32 size)
1057
{
1058
atomic_long_sub(size, &bpf_jit_current);
1059
}
1060
1061
void *__weak bpf_jit_alloc_exec(unsigned long size)
1062
{
1063
return execmem_alloc(EXECMEM_BPF, size);
1064
}
1065
1066
void __weak bpf_jit_free_exec(void *addr)
1067
{
1068
execmem_free(addr);
1069
}
1070
1071
struct bpf_binary_header *
1072
bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
1073
unsigned int alignment,
1074
bpf_jit_fill_hole_t bpf_fill_ill_insns)
1075
{
1076
struct bpf_binary_header *hdr;
1077
u32 size, hole, start;
1078
1079
WARN_ON_ONCE(!is_power_of_2(alignment) ||
1080
alignment > BPF_IMAGE_ALIGNMENT);
1081
1082
/* Most of BPF filters are really small, but if some of them
1083
* fill a page, allow at least 128 extra bytes to insert a
1084
* random section of illegal instructions.
1085
*/
1086
size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
1087
1088
if (bpf_jit_charge_modmem(size))
1089
return NULL;
1090
hdr = bpf_jit_alloc_exec(size);
1091
if (!hdr) {
1092
bpf_jit_uncharge_modmem(size);
1093
return NULL;
1094
}
1095
1096
/* Fill space with illegal/arch-dep instructions. */
1097
bpf_fill_ill_insns(hdr, size);
1098
1099
hdr->size = size;
1100
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
1101
PAGE_SIZE - sizeof(*hdr));
1102
start = get_random_u32_below(hole) & ~(alignment - 1);
1103
1104
/* Leave a random number of instructions before BPF code. */
1105
*image_ptr = &hdr->image[start];
1106
1107
return hdr;
1108
}
1109
1110
void bpf_jit_binary_free(struct bpf_binary_header *hdr)
1111
{
1112
u32 size = hdr->size;
1113
1114
bpf_jit_free_exec(hdr);
1115
bpf_jit_uncharge_modmem(size);
1116
}
1117
1118
/* Allocate jit binary from bpf_prog_pack allocator.
1119
* Since the allocated memory is RO+X, the JIT engine cannot write directly
1120
* to the memory. To solve this problem, a RW buffer is also allocated at
1121
* as the same time. The JIT engine should calculate offsets based on the
1122
* RO memory address, but write JITed program to the RW buffer. Once the
1123
* JIT engine finishes, it calls bpf_jit_binary_pack_finalize, which copies
1124
* the JITed program to the RO memory.
1125
*/
1126
struct bpf_binary_header *
1127
bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
1128
unsigned int alignment,
1129
struct bpf_binary_header **rw_header,
1130
u8 **rw_image,
1131
bpf_jit_fill_hole_t bpf_fill_ill_insns)
1132
{
1133
struct bpf_binary_header *ro_header;
1134
u32 size, hole, start;
1135
1136
WARN_ON_ONCE(!is_power_of_2(alignment) ||
1137
alignment > BPF_IMAGE_ALIGNMENT);
1138
1139
/* add 16 bytes for a random section of illegal instructions */
1140
size = round_up(proglen + sizeof(*ro_header) + 16, BPF_PROG_CHUNK_SIZE);
1141
1142
if (bpf_jit_charge_modmem(size))
1143
return NULL;
1144
ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns);
1145
if (!ro_header) {
1146
bpf_jit_uncharge_modmem(size);
1147
return NULL;
1148
}
1149
1150
*rw_header = kvmalloc(size, GFP_KERNEL);
1151
if (!*rw_header) {
1152
bpf_prog_pack_free(ro_header, size);
1153
bpf_jit_uncharge_modmem(size);
1154
return NULL;
1155
}
1156
1157
/* Fill space with illegal/arch-dep instructions. */
1158
bpf_fill_ill_insns(*rw_header, size);
1159
(*rw_header)->size = size;
1160
1161
hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)),
1162
BPF_PROG_CHUNK_SIZE - sizeof(*ro_header));
1163
start = get_random_u32_below(hole) & ~(alignment - 1);
1164
1165
*image_ptr = &ro_header->image[start];
1166
*rw_image = &(*rw_header)->image[start];
1167
1168
return ro_header;
1169
}
1170
1171
/* Copy JITed text from rw_header to its final location, the ro_header. */
1172
int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header,
1173
struct bpf_binary_header *rw_header)
1174
{
1175
void *ptr;
1176
1177
ptr = bpf_arch_text_copy(ro_header, rw_header, rw_header->size);
1178
1179
kvfree(rw_header);
1180
1181
if (IS_ERR(ptr)) {
1182
bpf_prog_pack_free(ro_header, ro_header->size);
1183
return PTR_ERR(ptr);
1184
}
1185
return 0;
1186
}
1187
1188
/* bpf_jit_binary_pack_free is called in two different scenarios:
1189
* 1) when the program is freed after;
1190
* 2) when the JIT engine fails (before bpf_jit_binary_pack_finalize).
1191
* For case 2), we need to free both the RO memory and the RW buffer.
1192
*
1193
* bpf_jit_binary_pack_free requires proper ro_header->size. However,
1194
* bpf_jit_binary_pack_alloc does not set it. Therefore, ro_header->size
1195
* must be set with either bpf_jit_binary_pack_finalize (normal path) or
1196
* bpf_arch_text_copy (when jit fails).
1197
*/
1198
void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
1199
struct bpf_binary_header *rw_header)
1200
{
1201
u32 size = ro_header->size;
1202
1203
bpf_prog_pack_free(ro_header, size);
1204
kvfree(rw_header);
1205
bpf_jit_uncharge_modmem(size);
1206
}
1207
1208
struct bpf_binary_header *
1209
bpf_jit_binary_pack_hdr(const struct bpf_prog *fp)
1210
{
1211
unsigned long real_start = (unsigned long)fp->bpf_func;
1212
unsigned long addr;
1213
1214
addr = real_start & BPF_PROG_CHUNK_MASK;
1215
return (void *)addr;
1216
}
1217
1218
static inline struct bpf_binary_header *
1219
bpf_jit_binary_hdr(const struct bpf_prog *fp)
1220
{
1221
unsigned long real_start = (unsigned long)fp->bpf_func;
1222
unsigned long addr;
1223
1224
addr = real_start & PAGE_MASK;
1225
return (void *)addr;
1226
}
1227
1228
/* This symbol is only overridden by archs that have different
1229
* requirements than the usual eBPF JITs, f.e. when they only
1230
* implement cBPF JIT, do not set images read-only, etc.
1231
*/
1232
void __weak bpf_jit_free(struct bpf_prog *fp)
1233
{
1234
if (fp->jited) {
1235
struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
1236
1237
bpf_jit_binary_free(hdr);
1238
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
1239
}
1240
1241
bpf_prog_unlock_free(fp);
1242
}
1243
1244
int bpf_jit_get_func_addr(const struct bpf_prog *prog,
1245
const struct bpf_insn *insn, bool extra_pass,
1246
u64 *func_addr, bool *func_addr_fixed)
1247
{
1248
s16 off = insn->off;
1249
s32 imm = insn->imm;
1250
u8 *addr;
1251
int err;
1252
1253
*func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
1254
if (!*func_addr_fixed) {
1255
/* Place-holder address till the last pass has collected
1256
* all addresses for JITed subprograms in which case we
1257
* can pick them up from prog->aux.
1258
*/
1259
if (!extra_pass)
1260
addr = NULL;
1261
else if (prog->aux->func &&
1262
off >= 0 && off < prog->aux->real_func_cnt)
1263
addr = (u8 *)prog->aux->func[off]->bpf_func;
1264
else
1265
return -EINVAL;
1266
} else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
1267
bpf_jit_supports_far_kfunc_call()) {
1268
err = bpf_get_kfunc_addr(prog, insn->imm, insn->off, &addr);
1269
if (err)
1270
return err;
1271
} else {
1272
/* Address of a BPF helper call. Since part of the core
1273
* kernel, it's always at a fixed location. __bpf_call_base
1274
* and the helper with imm relative to it are both in core
1275
* kernel.
1276
*/
1277
addr = (u8 *)__bpf_call_base + imm;
1278
}
1279
1280
*func_addr = (unsigned long)addr;
1281
return 0;
1282
}
1283
1284
const char *bpf_jit_get_prog_name(struct bpf_prog *prog)
1285
{
1286
if (prog->aux->ksym.prog)
1287
return prog->aux->ksym.name;
1288
return prog->aux->name;
1289
}
1290
1291
static int bpf_jit_blind_insn(const struct bpf_insn *from,
1292
const struct bpf_insn *aux,
1293
struct bpf_insn *to_buff,
1294
bool emit_zext)
1295
{
1296
struct bpf_insn *to = to_buff;
1297
u32 imm_rnd = get_random_u32();
1298
s16 off;
1299
1300
BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG);
1301
BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
1302
1303
/* Constraints on AX register:
1304
*
1305
* AX register is inaccessible from user space. It is mapped in
1306
* all JITs, and used here for constant blinding rewrites. It is
1307
* typically "stateless" meaning its contents are only valid within
1308
* the executed instruction, but not across several instructions.
1309
* There are a few exceptions however which are further detailed
1310
* below.
1311
*
1312
* Constant blinding is only used by JITs, not in the interpreter.
1313
* The interpreter uses AX in some occasions as a local temporary
1314
* register e.g. in DIV or MOD instructions.
1315
*
1316
* In restricted circumstances, the verifier can also use the AX
1317
* register for rewrites as long as they do not interfere with
1318
* the above cases!
1319
*/
1320
if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX)
1321
goto out;
1322
1323
if (from->imm == 0 &&
1324
(from->code == (BPF_ALU | BPF_MOV | BPF_K) ||
1325
from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
1326
*to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg);
1327
goto out;
1328
}
1329
1330
switch (from->code) {
1331
case BPF_ALU | BPF_ADD | BPF_K:
1332
case BPF_ALU | BPF_SUB | BPF_K:
1333
case BPF_ALU | BPF_AND | BPF_K:
1334
case BPF_ALU | BPF_OR | BPF_K:
1335
case BPF_ALU | BPF_XOR | BPF_K:
1336
case BPF_ALU | BPF_MUL | BPF_K:
1337
case BPF_ALU | BPF_MOV | BPF_K:
1338
case BPF_ALU | BPF_DIV | BPF_K:
1339
case BPF_ALU | BPF_MOD | BPF_K:
1340
*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1341
*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1342
*to++ = BPF_ALU32_REG_OFF(from->code, from->dst_reg, BPF_REG_AX, from->off);
1343
break;
1344
1345
case BPF_ALU64 | BPF_ADD | BPF_K:
1346
case BPF_ALU64 | BPF_SUB | BPF_K:
1347
case BPF_ALU64 | BPF_AND | BPF_K:
1348
case BPF_ALU64 | BPF_OR | BPF_K:
1349
case BPF_ALU64 | BPF_XOR | BPF_K:
1350
case BPF_ALU64 | BPF_MUL | BPF_K:
1351
case BPF_ALU64 | BPF_MOV | BPF_K:
1352
case BPF_ALU64 | BPF_DIV | BPF_K:
1353
case BPF_ALU64 | BPF_MOD | BPF_K:
1354
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1355
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1356
*to++ = BPF_ALU64_REG_OFF(from->code, from->dst_reg, BPF_REG_AX, from->off);
1357
break;
1358
1359
case BPF_JMP | BPF_JEQ | BPF_K:
1360
case BPF_JMP | BPF_JNE | BPF_K:
1361
case BPF_JMP | BPF_JGT | BPF_K:
1362
case BPF_JMP | BPF_JLT | BPF_K:
1363
case BPF_JMP | BPF_JGE | BPF_K:
1364
case BPF_JMP | BPF_JLE | BPF_K:
1365
case BPF_JMP | BPF_JSGT | BPF_K:
1366
case BPF_JMP | BPF_JSLT | BPF_K:
1367
case BPF_JMP | BPF_JSGE | BPF_K:
1368
case BPF_JMP | BPF_JSLE | BPF_K:
1369
case BPF_JMP | BPF_JSET | BPF_K:
1370
/* Accommodate for extra offset in case of a backjump. */
1371
off = from->off;
1372
if (off < 0)
1373
off -= 2;
1374
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1375
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1376
*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
1377
break;
1378
1379
case BPF_JMP32 | BPF_JEQ | BPF_K:
1380
case BPF_JMP32 | BPF_JNE | BPF_K:
1381
case BPF_JMP32 | BPF_JGT | BPF_K:
1382
case BPF_JMP32 | BPF_JLT | BPF_K:
1383
case BPF_JMP32 | BPF_JGE | BPF_K:
1384
case BPF_JMP32 | BPF_JLE | BPF_K:
1385
case BPF_JMP32 | BPF_JSGT | BPF_K:
1386
case BPF_JMP32 | BPF_JSLT | BPF_K:
1387
case BPF_JMP32 | BPF_JSGE | BPF_K:
1388
case BPF_JMP32 | BPF_JSLE | BPF_K:
1389
case BPF_JMP32 | BPF_JSET | BPF_K:
1390
/* Accommodate for extra offset in case of a backjump. */
1391
off = from->off;
1392
if (off < 0)
1393
off -= 2;
1394
*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1395
*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1396
*to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
1397
off);
1398
break;
1399
1400
case BPF_LD | BPF_IMM | BPF_DW:
1401
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
1402
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1403
*to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
1404
*to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX);
1405
break;
1406
case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
1407
*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
1408
*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1409
if (emit_zext)
1410
*to++ = BPF_ZEXT_REG(BPF_REG_AX);
1411
*to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX);
1412
break;
1413
1414
case BPF_ST | BPF_MEM | BPF_DW:
1415
case BPF_ST | BPF_MEM | BPF_W:
1416
case BPF_ST | BPF_MEM | BPF_H:
1417
case BPF_ST | BPF_MEM | BPF_B:
1418
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1419
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1420
*to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off);
1421
break;
1422
}
1423
out:
1424
return to - to_buff;
1425
}
1426
1427
static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
1428
gfp_t gfp_extra_flags)
1429
{
1430
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
1431
struct bpf_prog *fp;
1432
1433
fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags);
1434
if (fp != NULL) {
1435
/* aux->prog still points to the fp_other one, so
1436
* when promoting the clone to the real program,
1437
* this still needs to be adapted.
1438
*/
1439
memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE);
1440
}
1441
1442
return fp;
1443
}
1444
1445
static void bpf_prog_clone_free(struct bpf_prog *fp)
1446
{
1447
/* aux was stolen by the other clone, so we cannot free
1448
* it from this path! It will be freed eventually by the
1449
* other program on release.
1450
*
1451
* At this point, we don't need a deferred release since
1452
* clone is guaranteed to not be locked.
1453
*/
1454
fp->aux = NULL;
1455
fp->stats = NULL;
1456
fp->active = NULL;
1457
__bpf_prog_free(fp);
1458
}
1459
1460
void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
1461
{
1462
/* We have to repoint aux->prog to self, as we don't
1463
* know whether fp here is the clone or the original.
1464
*/
1465
fp->aux->prog = fp;
1466
bpf_prog_clone_free(fp_other);
1467
}
1468
1469
static void adjust_insn_arrays(struct bpf_prog *prog, u32 off, u32 len)
1470
{
1471
#ifdef CONFIG_BPF_SYSCALL
1472
struct bpf_map *map;
1473
int i;
1474
1475
if (len <= 1)
1476
return;
1477
1478
for (i = 0; i < prog->aux->used_map_cnt; i++) {
1479
map = prog->aux->used_maps[i];
1480
if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY)
1481
bpf_insn_array_adjust(map, off, len);
1482
}
1483
#endif
1484
}
1485
1486
struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
1487
{
1488
struct bpf_insn insn_buff[16], aux[2];
1489
struct bpf_prog *clone, *tmp;
1490
int insn_delta, insn_cnt;
1491
struct bpf_insn *insn;
1492
int i, rewritten;
1493
1494
if (!prog->blinding_requested || prog->blinded)
1495
return prog;
1496
1497
clone = bpf_prog_clone_create(prog, GFP_USER);
1498
if (!clone)
1499
return ERR_PTR(-ENOMEM);
1500
1501
insn_cnt = clone->len;
1502
insn = clone->insnsi;
1503
1504
for (i = 0; i < insn_cnt; i++, insn++) {
1505
if (bpf_pseudo_func(insn)) {
1506
/* ld_imm64 with an address of bpf subprog is not
1507
* a user controlled constant. Don't randomize it,
1508
* since it will conflict with jit_subprogs() logic.
1509
*/
1510
insn++;
1511
i++;
1512
continue;
1513
}
1514
1515
/* We temporarily need to hold the original ld64 insn
1516
* so that we can still access the first part in the
1517
* second blinding run.
1518
*/
1519
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
1520
insn[1].code == 0)
1521
memcpy(aux, insn, sizeof(aux));
1522
1523
rewritten = bpf_jit_blind_insn(insn, aux, insn_buff,
1524
clone->aux->verifier_zext);
1525
if (!rewritten)
1526
continue;
1527
1528
tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
1529
if (IS_ERR(tmp)) {
1530
/* Patching may have repointed aux->prog during
1531
* realloc from the original one, so we need to
1532
* fix it up here on error.
1533
*/
1534
bpf_jit_prog_release_other(prog, clone);
1535
return tmp;
1536
}
1537
1538
clone = tmp;
1539
insn_delta = rewritten - 1;
1540
1541
/* Instructions arrays must be updated using absolute xlated offsets */
1542
adjust_insn_arrays(clone, prog->aux->subprog_start + i, rewritten);
1543
1544
/* Walk new program and skip insns we just inserted. */
1545
insn = clone->insnsi + i + insn_delta;
1546
insn_cnt += insn_delta;
1547
i += insn_delta;
1548
}
1549
1550
clone->blinded = 1;
1551
return clone;
1552
}
1553
#endif /* CONFIG_BPF_JIT */
1554
1555
/* Base function for offset calculation. Needs to go into .text section,
1556
* therefore keeping it non-static as well; will also be used by JITs
1557
* anyway later on, so do not let the compiler omit it. This also needs
1558
* to go into kallsyms for correlation from e.g. bpftool, so naming
1559
* must not change.
1560
*/
1561
noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1562
{
1563
return 0;
1564
}
1565
EXPORT_SYMBOL_GPL(__bpf_call_base);
1566
1567
/* All UAPI available opcodes. */
1568
#define BPF_INSN_MAP(INSN_2, INSN_3) \
1569
/* 32 bit ALU operations. */ \
1570
/* Register based. */ \
1571
INSN_3(ALU, ADD, X), \
1572
INSN_3(ALU, SUB, X), \
1573
INSN_3(ALU, AND, X), \
1574
INSN_3(ALU, OR, X), \
1575
INSN_3(ALU, LSH, X), \
1576
INSN_3(ALU, RSH, X), \
1577
INSN_3(ALU, XOR, X), \
1578
INSN_3(ALU, MUL, X), \
1579
INSN_3(ALU, MOV, X), \
1580
INSN_3(ALU, ARSH, X), \
1581
INSN_3(ALU, DIV, X), \
1582
INSN_3(ALU, MOD, X), \
1583
INSN_2(ALU, NEG), \
1584
INSN_3(ALU, END, TO_BE), \
1585
INSN_3(ALU, END, TO_LE), \
1586
/* Immediate based. */ \
1587
INSN_3(ALU, ADD, K), \
1588
INSN_3(ALU, SUB, K), \
1589
INSN_3(ALU, AND, K), \
1590
INSN_3(ALU, OR, K), \
1591
INSN_3(ALU, LSH, K), \
1592
INSN_3(ALU, RSH, K), \
1593
INSN_3(ALU, XOR, K), \
1594
INSN_3(ALU, MUL, K), \
1595
INSN_3(ALU, MOV, K), \
1596
INSN_3(ALU, ARSH, K), \
1597
INSN_3(ALU, DIV, K), \
1598
INSN_3(ALU, MOD, K), \
1599
/* 64 bit ALU operations. */ \
1600
/* Register based. */ \
1601
INSN_3(ALU64, ADD, X), \
1602
INSN_3(ALU64, SUB, X), \
1603
INSN_3(ALU64, AND, X), \
1604
INSN_3(ALU64, OR, X), \
1605
INSN_3(ALU64, LSH, X), \
1606
INSN_3(ALU64, RSH, X), \
1607
INSN_3(ALU64, XOR, X), \
1608
INSN_3(ALU64, MUL, X), \
1609
INSN_3(ALU64, MOV, X), \
1610
INSN_3(ALU64, ARSH, X), \
1611
INSN_3(ALU64, DIV, X), \
1612
INSN_3(ALU64, MOD, X), \
1613
INSN_2(ALU64, NEG), \
1614
INSN_3(ALU64, END, TO_LE), \
1615
/* Immediate based. */ \
1616
INSN_3(ALU64, ADD, K), \
1617
INSN_3(ALU64, SUB, K), \
1618
INSN_3(ALU64, AND, K), \
1619
INSN_3(ALU64, OR, K), \
1620
INSN_3(ALU64, LSH, K), \
1621
INSN_3(ALU64, RSH, K), \
1622
INSN_3(ALU64, XOR, K), \
1623
INSN_3(ALU64, MUL, K), \
1624
INSN_3(ALU64, MOV, K), \
1625
INSN_3(ALU64, ARSH, K), \
1626
INSN_3(ALU64, DIV, K), \
1627
INSN_3(ALU64, MOD, K), \
1628
/* Call instruction. */ \
1629
INSN_2(JMP, CALL), \
1630
/* Exit instruction. */ \
1631
INSN_2(JMP, EXIT), \
1632
/* 32-bit Jump instructions. */ \
1633
/* Register based. */ \
1634
INSN_3(JMP32, JEQ, X), \
1635
INSN_3(JMP32, JNE, X), \
1636
INSN_3(JMP32, JGT, X), \
1637
INSN_3(JMP32, JLT, X), \
1638
INSN_3(JMP32, JGE, X), \
1639
INSN_3(JMP32, JLE, X), \
1640
INSN_3(JMP32, JSGT, X), \
1641
INSN_3(JMP32, JSLT, X), \
1642
INSN_3(JMP32, JSGE, X), \
1643
INSN_3(JMP32, JSLE, X), \
1644
INSN_3(JMP32, JSET, X), \
1645
/* Immediate based. */ \
1646
INSN_3(JMP32, JEQ, K), \
1647
INSN_3(JMP32, JNE, K), \
1648
INSN_3(JMP32, JGT, K), \
1649
INSN_3(JMP32, JLT, K), \
1650
INSN_3(JMP32, JGE, K), \
1651
INSN_3(JMP32, JLE, K), \
1652
INSN_3(JMP32, JSGT, K), \
1653
INSN_3(JMP32, JSLT, K), \
1654
INSN_3(JMP32, JSGE, K), \
1655
INSN_3(JMP32, JSLE, K), \
1656
INSN_3(JMP32, JSET, K), \
1657
/* Jump instructions. */ \
1658
/* Register based. */ \
1659
INSN_3(JMP, JEQ, X), \
1660
INSN_3(JMP, JNE, X), \
1661
INSN_3(JMP, JGT, X), \
1662
INSN_3(JMP, JLT, X), \
1663
INSN_3(JMP, JGE, X), \
1664
INSN_3(JMP, JLE, X), \
1665
INSN_3(JMP, JSGT, X), \
1666
INSN_3(JMP, JSLT, X), \
1667
INSN_3(JMP, JSGE, X), \
1668
INSN_3(JMP, JSLE, X), \
1669
INSN_3(JMP, JSET, X), \
1670
/* Immediate based. */ \
1671
INSN_3(JMP, JEQ, K), \
1672
INSN_3(JMP, JNE, K), \
1673
INSN_3(JMP, JGT, K), \
1674
INSN_3(JMP, JLT, K), \
1675
INSN_3(JMP, JGE, K), \
1676
INSN_3(JMP, JLE, K), \
1677
INSN_3(JMP, JSGT, K), \
1678
INSN_3(JMP, JSLT, K), \
1679
INSN_3(JMP, JSGE, K), \
1680
INSN_3(JMP, JSLE, K), \
1681
INSN_3(JMP, JSET, K), \
1682
INSN_2(JMP, JA), \
1683
INSN_2(JMP32, JA), \
1684
/* Atomic operations. */ \
1685
INSN_3(STX, ATOMIC, B), \
1686
INSN_3(STX, ATOMIC, H), \
1687
INSN_3(STX, ATOMIC, W), \
1688
INSN_3(STX, ATOMIC, DW), \
1689
/* Store instructions. */ \
1690
/* Register based. */ \
1691
INSN_3(STX, MEM, B), \
1692
INSN_3(STX, MEM, H), \
1693
INSN_3(STX, MEM, W), \
1694
INSN_3(STX, MEM, DW), \
1695
/* Immediate based. */ \
1696
INSN_3(ST, MEM, B), \
1697
INSN_3(ST, MEM, H), \
1698
INSN_3(ST, MEM, W), \
1699
INSN_3(ST, MEM, DW), \
1700
/* Load instructions. */ \
1701
/* Register based. */ \
1702
INSN_3(LDX, MEM, B), \
1703
INSN_3(LDX, MEM, H), \
1704
INSN_3(LDX, MEM, W), \
1705
INSN_3(LDX, MEM, DW), \
1706
INSN_3(LDX, MEMSX, B), \
1707
INSN_3(LDX, MEMSX, H), \
1708
INSN_3(LDX, MEMSX, W), \
1709
/* Immediate based. */ \
1710
INSN_3(LD, IMM, DW)
1711
1712
bool bpf_opcode_in_insntable(u8 code)
1713
{
1714
#define BPF_INSN_2_TBL(x, y) [BPF_##x | BPF_##y] = true
1715
#define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true
1716
static const bool public_insntable[256] = {
1717
[0 ... 255] = false,
1718
/* Now overwrite non-defaults ... */
1719
BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
1720
/* UAPI exposed, but rewritten opcodes. cBPF carry-over. */
1721
[BPF_LD | BPF_ABS | BPF_B] = true,
1722
[BPF_LD | BPF_ABS | BPF_H] = true,
1723
[BPF_LD | BPF_ABS | BPF_W] = true,
1724
[BPF_LD | BPF_IND | BPF_B] = true,
1725
[BPF_LD | BPF_IND | BPF_H] = true,
1726
[BPF_LD | BPF_IND | BPF_W] = true,
1727
[BPF_JMP | BPF_JA | BPF_X] = true,
1728
[BPF_JMP | BPF_JCOND] = true,
1729
};
1730
#undef BPF_INSN_3_TBL
1731
#undef BPF_INSN_2_TBL
1732
return public_insntable[code];
1733
}
1734
1735
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1736
/**
1737
* ___bpf_prog_run - run eBPF program on a given context
1738
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
1739
* @insn: is the array of eBPF instructions
1740
*
1741
* Decode and execute eBPF instructions.
1742
*
1743
* Return: whatever value is in %BPF_R0 at program exit
1744
*/
1745
static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
1746
{
1747
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
1748
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
1749
static const void * const jumptable[256] __annotate_jump_table = {
1750
[0 ... 255] = &&default_label,
1751
/* Now overwrite non-defaults ... */
1752
BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
1753
/* Non-UAPI available opcodes. */
1754
[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
1755
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
1756
[BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC,
1757
[BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
1758
[BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
1759
[BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
1760
[BPF_LDX | BPF_PROBE_MEM | BPF_DW] = &&LDX_PROBE_MEM_DW,
1761
[BPF_LDX | BPF_PROBE_MEMSX | BPF_B] = &&LDX_PROBE_MEMSX_B,
1762
[BPF_LDX | BPF_PROBE_MEMSX | BPF_H] = &&LDX_PROBE_MEMSX_H,
1763
[BPF_LDX | BPF_PROBE_MEMSX | BPF_W] = &&LDX_PROBE_MEMSX_W,
1764
};
1765
#undef BPF_INSN_3_LBL
1766
#undef BPF_INSN_2_LBL
1767
u32 tail_call_cnt = 0;
1768
1769
#define CONT ({ insn++; goto select_insn; })
1770
#define CONT_JMP ({ insn++; goto select_insn; })
1771
1772
select_insn:
1773
goto *jumptable[insn->code];
1774
1775
/* Explicitly mask the register-based shift amounts with 63 or 31
1776
* to avoid undefined behavior. Normally this won't affect the
1777
* generated code, for example, in case of native 64 bit archs such
1778
* as x86-64 or arm64, the compiler is optimizing the AND away for
1779
* the interpreter. In case of JITs, each of the JIT backends compiles
1780
* the BPF shift operations to machine instructions which produce
1781
* implementation-defined results in such a case; the resulting
1782
* contents of the register may be arbitrary, but program behaviour
1783
* as a whole remains defined. In other words, in case of JIT backends,
1784
* the AND must /not/ be added to the emitted LSH/RSH/ARSH translation.
1785
*/
1786
/* ALU (shifts) */
1787
#define SHT(OPCODE, OP) \
1788
ALU64_##OPCODE##_X: \
1789
DST = DST OP (SRC & 63); \
1790
CONT; \
1791
ALU_##OPCODE##_X: \
1792
DST = (u32) DST OP ((u32) SRC & 31); \
1793
CONT; \
1794
ALU64_##OPCODE##_K: \
1795
DST = DST OP IMM; \
1796
CONT; \
1797
ALU_##OPCODE##_K: \
1798
DST = (u32) DST OP (u32) IMM; \
1799
CONT;
1800
/* ALU (rest) */
1801
#define ALU(OPCODE, OP) \
1802
ALU64_##OPCODE##_X: \
1803
DST = DST OP SRC; \
1804
CONT; \
1805
ALU_##OPCODE##_X: \
1806
DST = (u32) DST OP (u32) SRC; \
1807
CONT; \
1808
ALU64_##OPCODE##_K: \
1809
DST = DST OP IMM; \
1810
CONT; \
1811
ALU_##OPCODE##_K: \
1812
DST = (u32) DST OP (u32) IMM; \
1813
CONT;
1814
ALU(ADD, +)
1815
ALU(SUB, -)
1816
ALU(AND, &)
1817
ALU(OR, |)
1818
ALU(XOR, ^)
1819
ALU(MUL, *)
1820
SHT(LSH, <<)
1821
SHT(RSH, >>)
1822
#undef SHT
1823
#undef ALU
1824
ALU_NEG:
1825
DST = (u32) -DST;
1826
CONT;
1827
ALU64_NEG:
1828
DST = -DST;
1829
CONT;
1830
ALU_MOV_X:
1831
switch (OFF) {
1832
case 0:
1833
DST = (u32) SRC;
1834
break;
1835
case 8:
1836
DST = (u32)(s8) SRC;
1837
break;
1838
case 16:
1839
DST = (u32)(s16) SRC;
1840
break;
1841
}
1842
CONT;
1843
ALU_MOV_K:
1844
DST = (u32) IMM;
1845
CONT;
1846
ALU64_MOV_X:
1847
switch (OFF) {
1848
case 0:
1849
DST = SRC;
1850
break;
1851
case 8:
1852
DST = (s8) SRC;
1853
break;
1854
case 16:
1855
DST = (s16) SRC;
1856
break;
1857
case 32:
1858
DST = (s32) SRC;
1859
break;
1860
}
1861
CONT;
1862
ALU64_MOV_K:
1863
DST = IMM;
1864
CONT;
1865
LD_IMM_DW:
1866
DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
1867
insn++;
1868
CONT;
1869
ALU_ARSH_X:
1870
DST = (u64) (u32) (((s32) DST) >> (SRC & 31));
1871
CONT;
1872
ALU_ARSH_K:
1873
DST = (u64) (u32) (((s32) DST) >> IMM);
1874
CONT;
1875
ALU64_ARSH_X:
1876
(*(s64 *) &DST) >>= (SRC & 63);
1877
CONT;
1878
ALU64_ARSH_K:
1879
(*(s64 *) &DST) >>= IMM;
1880
CONT;
1881
ALU64_MOD_X:
1882
switch (OFF) {
1883
case 0:
1884
div64_u64_rem(DST, SRC, &AX);
1885
DST = AX;
1886
break;
1887
case 1:
1888
AX = div64_s64(DST, SRC);
1889
DST = DST - AX * SRC;
1890
break;
1891
}
1892
CONT;
1893
ALU_MOD_X:
1894
switch (OFF) {
1895
case 0:
1896
AX = (u32) DST;
1897
DST = do_div(AX, (u32) SRC);
1898
break;
1899
case 1:
1900
AX = abs((s32)DST);
1901
AX = do_div(AX, abs((s32)SRC));
1902
if ((s32)DST < 0)
1903
DST = (u32)-AX;
1904
else
1905
DST = (u32)AX;
1906
break;
1907
}
1908
CONT;
1909
ALU64_MOD_K:
1910
switch (OFF) {
1911
case 0:
1912
div64_u64_rem(DST, IMM, &AX);
1913
DST = AX;
1914
break;
1915
case 1:
1916
AX = div64_s64(DST, IMM);
1917
DST = DST - AX * IMM;
1918
break;
1919
}
1920
CONT;
1921
ALU_MOD_K:
1922
switch (OFF) {
1923
case 0:
1924
AX = (u32) DST;
1925
DST = do_div(AX, (u32) IMM);
1926
break;
1927
case 1:
1928
AX = abs((s32)DST);
1929
AX = do_div(AX, abs((s32)IMM));
1930
if ((s32)DST < 0)
1931
DST = (u32)-AX;
1932
else
1933
DST = (u32)AX;
1934
break;
1935
}
1936
CONT;
1937
ALU64_DIV_X:
1938
switch (OFF) {
1939
case 0:
1940
DST = div64_u64(DST, SRC);
1941
break;
1942
case 1:
1943
DST = div64_s64(DST, SRC);
1944
break;
1945
}
1946
CONT;
1947
ALU_DIV_X:
1948
switch (OFF) {
1949
case 0:
1950
AX = (u32) DST;
1951
do_div(AX, (u32) SRC);
1952
DST = (u32) AX;
1953
break;
1954
case 1:
1955
AX = abs((s32)DST);
1956
do_div(AX, abs((s32)SRC));
1957
if (((s32)DST < 0) == ((s32)SRC < 0))
1958
DST = (u32)AX;
1959
else
1960
DST = (u32)-AX;
1961
break;
1962
}
1963
CONT;
1964
ALU64_DIV_K:
1965
switch (OFF) {
1966
case 0:
1967
DST = div64_u64(DST, IMM);
1968
break;
1969
case 1:
1970
DST = div64_s64(DST, IMM);
1971
break;
1972
}
1973
CONT;
1974
ALU_DIV_K:
1975
switch (OFF) {
1976
case 0:
1977
AX = (u32) DST;
1978
do_div(AX, (u32) IMM);
1979
DST = (u32) AX;
1980
break;
1981
case 1:
1982
AX = abs((s32)DST);
1983
do_div(AX, abs((s32)IMM));
1984
if (((s32)DST < 0) == ((s32)IMM < 0))
1985
DST = (u32)AX;
1986
else
1987
DST = (u32)-AX;
1988
break;
1989
}
1990
CONT;
1991
ALU_END_TO_BE:
1992
switch (IMM) {
1993
case 16:
1994
DST = (__force u16) cpu_to_be16(DST);
1995
break;
1996
case 32:
1997
DST = (__force u32) cpu_to_be32(DST);
1998
break;
1999
case 64:
2000
DST = (__force u64) cpu_to_be64(DST);
2001
break;
2002
}
2003
CONT;
2004
ALU_END_TO_LE:
2005
switch (IMM) {
2006
case 16:
2007
DST = (__force u16) cpu_to_le16(DST);
2008
break;
2009
case 32:
2010
DST = (__force u32) cpu_to_le32(DST);
2011
break;
2012
case 64:
2013
DST = (__force u64) cpu_to_le64(DST);
2014
break;
2015
}
2016
CONT;
2017
ALU64_END_TO_LE:
2018
switch (IMM) {
2019
case 16:
2020
DST = (__force u16) __swab16(DST);
2021
break;
2022
case 32:
2023
DST = (__force u32) __swab32(DST);
2024
break;
2025
case 64:
2026
DST = (__force u64) __swab64(DST);
2027
break;
2028
}
2029
CONT;
2030
2031
/* CALL */
2032
JMP_CALL:
2033
/* Function call scratches BPF_R1-BPF_R5 registers,
2034
* preserves BPF_R6-BPF_R9, and stores return value
2035
* into BPF_R0.
2036
*/
2037
BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
2038
BPF_R4, BPF_R5);
2039
CONT;
2040
2041
JMP_CALL_ARGS:
2042
BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2,
2043
BPF_R3, BPF_R4,
2044
BPF_R5,
2045
insn + insn->off + 1);
2046
CONT;
2047
2048
JMP_TAIL_CALL: {
2049
struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
2050
struct bpf_array *array = container_of(map, struct bpf_array, map);
2051
struct bpf_prog *prog;
2052
u32 index = BPF_R3;
2053
2054
if (unlikely(index >= array->map.max_entries))
2055
goto out;
2056
2057
if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT))
2058
goto out;
2059
2060
tail_call_cnt++;
2061
2062
prog = READ_ONCE(array->ptrs[index]);
2063
if (!prog)
2064
goto out;
2065
2066
/* ARG1 at this point is guaranteed to point to CTX from
2067
* the verifier side due to the fact that the tail call is
2068
* handled like a helper, that is, bpf_tail_call_proto,
2069
* where arg1_type is ARG_PTR_TO_CTX.
2070
*/
2071
insn = prog->insnsi;
2072
goto select_insn;
2073
out:
2074
CONT;
2075
}
2076
JMP_JA:
2077
insn += insn->off;
2078
CONT;
2079
JMP32_JA:
2080
insn += insn->imm;
2081
CONT;
2082
JMP_EXIT:
2083
return BPF_R0;
2084
/* JMP */
2085
#define COND_JMP(SIGN, OPCODE, CMP_OP) \
2086
JMP_##OPCODE##_X: \
2087
if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \
2088
insn += insn->off; \
2089
CONT_JMP; \
2090
} \
2091
CONT; \
2092
JMP32_##OPCODE##_X: \
2093
if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \
2094
insn += insn->off; \
2095
CONT_JMP; \
2096
} \
2097
CONT; \
2098
JMP_##OPCODE##_K: \
2099
if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \
2100
insn += insn->off; \
2101
CONT_JMP; \
2102
} \
2103
CONT; \
2104
JMP32_##OPCODE##_K: \
2105
if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \
2106
insn += insn->off; \
2107
CONT_JMP; \
2108
} \
2109
CONT;
2110
COND_JMP(u, JEQ, ==)
2111
COND_JMP(u, JNE, !=)
2112
COND_JMP(u, JGT, >)
2113
COND_JMP(u, JLT, <)
2114
COND_JMP(u, JGE, >=)
2115
COND_JMP(u, JLE, <=)
2116
COND_JMP(u, JSET, &)
2117
COND_JMP(s, JSGT, >)
2118
COND_JMP(s, JSLT, <)
2119
COND_JMP(s, JSGE, >=)
2120
COND_JMP(s, JSLE, <=)
2121
#undef COND_JMP
2122
/* ST, STX and LDX*/
2123
ST_NOSPEC:
2124
/* Speculation barrier for mitigating Speculative Store Bypass,
2125
* Bounds-Check Bypass and Type Confusion. In case of arm64, we
2126
* rely on the firmware mitigation as controlled via the ssbd
2127
* kernel parameter. Whenever the mitigation is enabled, it
2128
* works for all of the kernel code with no need to provide any
2129
* additional instructions here. In case of x86, we use 'lfence'
2130
* insn for mitigation. We reuse preexisting logic from Spectre
2131
* v1 mitigation that happens to produce the required code on
2132
* x86 for v4 as well.
2133
*/
2134
barrier_nospec();
2135
CONT;
2136
#define LDST(SIZEOP, SIZE) \
2137
STX_MEM_##SIZEOP: \
2138
*(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
2139
CONT; \
2140
ST_MEM_##SIZEOP: \
2141
*(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
2142
CONT; \
2143
LDX_MEM_##SIZEOP: \
2144
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
2145
CONT; \
2146
LDX_PROBE_MEM_##SIZEOP: \
2147
bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
2148
(const void *)(long) (SRC + insn->off)); \
2149
DST = *((SIZE *)&DST); \
2150
CONT;
2151
2152
LDST(B, u8)
2153
LDST(H, u16)
2154
LDST(W, u32)
2155
LDST(DW, u64)
2156
#undef LDST
2157
2158
#define LDSX(SIZEOP, SIZE) \
2159
LDX_MEMSX_##SIZEOP: \
2160
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
2161
CONT; \
2162
LDX_PROBE_MEMSX_##SIZEOP: \
2163
bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \
2164
(const void *)(long) (SRC + insn->off)); \
2165
DST = *((SIZE *)&DST); \
2166
CONT;
2167
2168
LDSX(B, s8)
2169
LDSX(H, s16)
2170
LDSX(W, s32)
2171
#undef LDSX
2172
2173
#define ATOMIC_ALU_OP(BOP, KOP) \
2174
case BOP: \
2175
if (BPF_SIZE(insn->code) == BPF_W) \
2176
atomic_##KOP((u32) SRC, (atomic_t *)(unsigned long) \
2177
(DST + insn->off)); \
2178
else if (BPF_SIZE(insn->code) == BPF_DW) \
2179
atomic64_##KOP((u64) SRC, (atomic64_t *)(unsigned long) \
2180
(DST + insn->off)); \
2181
else \
2182
goto default_label; \
2183
break; \
2184
case BOP | BPF_FETCH: \
2185
if (BPF_SIZE(insn->code) == BPF_W) \
2186
SRC = (u32) atomic_fetch_##KOP( \
2187
(u32) SRC, \
2188
(atomic_t *)(unsigned long) (DST + insn->off)); \
2189
else if (BPF_SIZE(insn->code) == BPF_DW) \
2190
SRC = (u64) atomic64_fetch_##KOP( \
2191
(u64) SRC, \
2192
(atomic64_t *)(unsigned long) (DST + insn->off)); \
2193
else \
2194
goto default_label; \
2195
break;
2196
2197
STX_ATOMIC_DW:
2198
STX_ATOMIC_W:
2199
STX_ATOMIC_H:
2200
STX_ATOMIC_B:
2201
switch (IMM) {
2202
/* Atomic read-modify-write instructions support only W and DW
2203
* size modifiers.
2204
*/
2205
ATOMIC_ALU_OP(BPF_ADD, add)
2206
ATOMIC_ALU_OP(BPF_AND, and)
2207
ATOMIC_ALU_OP(BPF_OR, or)
2208
ATOMIC_ALU_OP(BPF_XOR, xor)
2209
#undef ATOMIC_ALU_OP
2210
2211
case BPF_XCHG:
2212
if (BPF_SIZE(insn->code) == BPF_W)
2213
SRC = (u32) atomic_xchg(
2214
(atomic_t *)(unsigned long) (DST + insn->off),
2215
(u32) SRC);
2216
else if (BPF_SIZE(insn->code) == BPF_DW)
2217
SRC = (u64) atomic64_xchg(
2218
(atomic64_t *)(unsigned long) (DST + insn->off),
2219
(u64) SRC);
2220
else
2221
goto default_label;
2222
break;
2223
case BPF_CMPXCHG:
2224
if (BPF_SIZE(insn->code) == BPF_W)
2225
BPF_R0 = (u32) atomic_cmpxchg(
2226
(atomic_t *)(unsigned long) (DST + insn->off),
2227
(u32) BPF_R0, (u32) SRC);
2228
else if (BPF_SIZE(insn->code) == BPF_DW)
2229
BPF_R0 = (u64) atomic64_cmpxchg(
2230
(atomic64_t *)(unsigned long) (DST + insn->off),
2231
(u64) BPF_R0, (u64) SRC);
2232
else
2233
goto default_label;
2234
break;
2235
/* Atomic load and store instructions support all size
2236
* modifiers.
2237
*/
2238
case BPF_LOAD_ACQ:
2239
switch (BPF_SIZE(insn->code)) {
2240
#define LOAD_ACQUIRE(SIZEOP, SIZE) \
2241
case BPF_##SIZEOP: \
2242
DST = (SIZE)smp_load_acquire( \
2243
(SIZE *)(unsigned long)(SRC + insn->off)); \
2244
break;
2245
LOAD_ACQUIRE(B, u8)
2246
LOAD_ACQUIRE(H, u16)
2247
LOAD_ACQUIRE(W, u32)
2248
#ifdef CONFIG_64BIT
2249
LOAD_ACQUIRE(DW, u64)
2250
#endif
2251
#undef LOAD_ACQUIRE
2252
default:
2253
goto default_label;
2254
}
2255
break;
2256
case BPF_STORE_REL:
2257
switch (BPF_SIZE(insn->code)) {
2258
#define STORE_RELEASE(SIZEOP, SIZE) \
2259
case BPF_##SIZEOP: \
2260
smp_store_release( \
2261
(SIZE *)(unsigned long)(DST + insn->off), (SIZE)SRC); \
2262
break;
2263
STORE_RELEASE(B, u8)
2264
STORE_RELEASE(H, u16)
2265
STORE_RELEASE(W, u32)
2266
#ifdef CONFIG_64BIT
2267
STORE_RELEASE(DW, u64)
2268
#endif
2269
#undef STORE_RELEASE
2270
default:
2271
goto default_label;
2272
}
2273
break;
2274
2275
default:
2276
goto default_label;
2277
}
2278
CONT;
2279
2280
default_label:
2281
/* If we ever reach this, we have a bug somewhere. Die hard here
2282
* instead of just returning 0; we could be somewhere in a subprog,
2283
* so execution could continue otherwise which we do /not/ want.
2284
*
2285
* Note, verifier whitelists all opcodes in bpf_opcode_in_insntable().
2286
*/
2287
pr_warn("BPF interpreter: unknown opcode %02x (imm: 0x%x)\n",
2288
insn->code, insn->imm);
2289
BUG_ON(1);
2290
return 0;
2291
}
2292
2293
#define PROG_NAME(stack_size) __bpf_prog_run##stack_size
2294
#define DEFINE_BPF_PROG_RUN(stack_size) \
2295
static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
2296
{ \
2297
u64 stack[stack_size / sizeof(u64)]; \
2298
u64 regs[MAX_BPF_EXT_REG] = {}; \
2299
\
2300
kmsan_unpoison_memory(stack, sizeof(stack)); \
2301
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
2302
ARG1 = (u64) (unsigned long) ctx; \
2303
return ___bpf_prog_run(regs, insn); \
2304
}
2305
2306
#define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size
2307
#define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \
2308
static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
2309
const struct bpf_insn *insn) \
2310
{ \
2311
u64 stack[stack_size / sizeof(u64)]; \
2312
u64 regs[MAX_BPF_EXT_REG]; \
2313
\
2314
kmsan_unpoison_memory(stack, sizeof(stack)); \
2315
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
2316
BPF_R1 = r1; \
2317
BPF_R2 = r2; \
2318
BPF_R3 = r3; \
2319
BPF_R4 = r4; \
2320
BPF_R5 = r5; \
2321
return ___bpf_prog_run(regs, insn); \
2322
}
2323
2324
#define EVAL1(FN, X) FN(X)
2325
#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
2326
#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
2327
#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
2328
#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
2329
#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
2330
2331
EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
2332
EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
2333
EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);
2334
2335
EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192);
2336
EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384);
2337
EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512);
2338
2339
#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
2340
2341
static unsigned int (*interpreters[])(const void *ctx,
2342
const struct bpf_insn *insn) = {
2343
EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
2344
EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
2345
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
2346
};
2347
#undef PROG_NAME_LIST
2348
#define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size),
2349
static __maybe_unused
2350
u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5,
2351
const struct bpf_insn *insn) = {
2352
EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
2353
EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
2354
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
2355
};
2356
#undef PROG_NAME_LIST
2357
2358
#ifdef CONFIG_BPF_SYSCALL
2359
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
2360
{
2361
stack_depth = max_t(u32, stack_depth, 1);
2362
insn->off = (s16) insn->imm;
2363
insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] -
2364
__bpf_call_base_args;
2365
insn->code = BPF_JMP | BPF_CALL_ARGS;
2366
}
2367
#endif
2368
#endif
2369
2370
static unsigned int __bpf_prog_ret0_warn(const void *ctx,
2371
const struct bpf_insn *insn)
2372
{
2373
/* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
2374
* is not working properly, so warn about it!
2375
*/
2376
WARN_ON_ONCE(1);
2377
return 0;
2378
}
2379
2380
static bool __bpf_prog_map_compatible(struct bpf_map *map,
2381
const struct bpf_prog *fp)
2382
{
2383
enum bpf_prog_type prog_type = resolve_prog_type(fp);
2384
struct bpf_prog_aux *aux = fp->aux;
2385
enum bpf_cgroup_storage_type i;
2386
bool ret = false;
2387
u64 cookie;
2388
2389
if (fp->kprobe_override)
2390
return ret;
2391
2392
spin_lock(&map->owner_lock);
2393
/* There's no owner yet where we could check for compatibility. */
2394
if (!map->owner) {
2395
map->owner = bpf_map_owner_alloc(map);
2396
if (!map->owner)
2397
goto err;
2398
map->owner->type = prog_type;
2399
map->owner->jited = fp->jited;
2400
map->owner->xdp_has_frags = aux->xdp_has_frags;
2401
map->owner->expected_attach_type = fp->expected_attach_type;
2402
map->owner->attach_func_proto = aux->attach_func_proto;
2403
for_each_cgroup_storage_type(i) {
2404
map->owner->storage_cookie[i] =
2405
aux->cgroup_storage[i] ?
2406
aux->cgroup_storage[i]->cookie : 0;
2407
}
2408
ret = true;
2409
} else {
2410
ret = map->owner->type == prog_type &&
2411
map->owner->jited == fp->jited &&
2412
map->owner->xdp_has_frags == aux->xdp_has_frags;
2413
if (ret &&
2414
map->map_type == BPF_MAP_TYPE_PROG_ARRAY &&
2415
map->owner->expected_attach_type != fp->expected_attach_type)
2416
ret = false;
2417
for_each_cgroup_storage_type(i) {
2418
if (!ret)
2419
break;
2420
cookie = aux->cgroup_storage[i] ?
2421
aux->cgroup_storage[i]->cookie : 0;
2422
ret = map->owner->storage_cookie[i] == cookie ||
2423
!cookie;
2424
}
2425
if (ret &&
2426
map->owner->attach_func_proto != aux->attach_func_proto) {
2427
switch (prog_type) {
2428
case BPF_PROG_TYPE_TRACING:
2429
case BPF_PROG_TYPE_LSM:
2430
case BPF_PROG_TYPE_EXT:
2431
case BPF_PROG_TYPE_STRUCT_OPS:
2432
ret = false;
2433
break;
2434
default:
2435
break;
2436
}
2437
}
2438
}
2439
err:
2440
spin_unlock(&map->owner_lock);
2441
return ret;
2442
}
2443
2444
bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp)
2445
{
2446
/* XDP programs inserted into maps are not guaranteed to run on
2447
* a particular netdev (and can run outside driver context entirely
2448
* in the case of devmap and cpumap). Until device checks
2449
* are implemented, prohibit adding dev-bound programs to program maps.
2450
*/
2451
if (bpf_prog_is_dev_bound(fp->aux))
2452
return false;
2453
2454
return __bpf_prog_map_compatible(map, fp);
2455
}
2456
2457
static int bpf_check_tail_call(const struct bpf_prog *fp)
2458
{
2459
struct bpf_prog_aux *aux = fp->aux;
2460
int i, ret = 0;
2461
2462
mutex_lock(&aux->used_maps_mutex);
2463
for (i = 0; i < aux->used_map_cnt; i++) {
2464
struct bpf_map *map = aux->used_maps[i];
2465
2466
if (!map_type_contains_progs(map))
2467
continue;
2468
2469
if (!__bpf_prog_map_compatible(map, fp)) {
2470
ret = -EINVAL;
2471
goto out;
2472
}
2473
}
2474
2475
out:
2476
mutex_unlock(&aux->used_maps_mutex);
2477
return ret;
2478
}
2479
2480
static bool bpf_prog_select_interpreter(struct bpf_prog *fp)
2481
{
2482
bool select_interpreter = false;
2483
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
2484
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
2485
u32 idx = (round_up(stack_depth, 32) / 32) - 1;
2486
2487
/* may_goto may cause stack size > 512, leading to idx out-of-bounds.
2488
* But for non-JITed programs, we don't need bpf_func, so no bounds
2489
* check needed.
2490
*/
2491
if (idx < ARRAY_SIZE(interpreters)) {
2492
fp->bpf_func = interpreters[idx];
2493
select_interpreter = true;
2494
} else {
2495
fp->bpf_func = __bpf_prog_ret0_warn;
2496
}
2497
#else
2498
fp->bpf_func = __bpf_prog_ret0_warn;
2499
#endif
2500
return select_interpreter;
2501
}
2502
2503
/**
2504
* bpf_prog_select_runtime - select exec runtime for BPF program
2505
* @fp: bpf_prog populated with BPF program
2506
* @err: pointer to error variable
2507
*
2508
* Try to JIT eBPF program, if JIT is not available, use interpreter.
2509
* The BPF program will be executed via bpf_prog_run() function.
2510
*
2511
* Return: the &fp argument along with &err set to 0 for success or
2512
* a negative errno code on failure
2513
*/
2514
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
2515
{
2516
/* In case of BPF to BPF calls, verifier did all the prep
2517
* work with regards to JITing, etc.
2518
*/
2519
bool jit_needed = false;
2520
2521
if (fp->bpf_func)
2522
goto finalize;
2523
2524
if (IS_ENABLED(CONFIG_BPF_JIT_ALWAYS_ON) ||
2525
bpf_prog_has_kfunc_call(fp))
2526
jit_needed = true;
2527
2528
if (!bpf_prog_select_interpreter(fp))
2529
jit_needed = true;
2530
2531
/* eBPF JITs can rewrite the program in case constant
2532
* blinding is active. However, in case of error during
2533
* blinding, bpf_int_jit_compile() must always return a
2534
* valid program, which in this case would simply not
2535
* be JITed, but falls back to the interpreter.
2536
*/
2537
if (!bpf_prog_is_offloaded(fp->aux)) {
2538
*err = bpf_prog_alloc_jited_linfo(fp);
2539
if (*err)
2540
return fp;
2541
2542
fp = bpf_int_jit_compile(fp);
2543
bpf_prog_jit_attempt_done(fp);
2544
if (!fp->jited && jit_needed) {
2545
*err = -ENOTSUPP;
2546
return fp;
2547
}
2548
} else {
2549
*err = bpf_prog_offload_compile(fp);
2550
if (*err)
2551
return fp;
2552
}
2553
2554
finalize:
2555
*err = bpf_prog_lock_ro(fp);
2556
if (*err)
2557
return fp;
2558
2559
/* The tail call compatibility check can only be done at
2560
* this late stage as we need to determine, if we deal
2561
* with JITed or non JITed program concatenations and not
2562
* all eBPF JITs might immediately support all features.
2563
*/
2564
*err = bpf_check_tail_call(fp);
2565
2566
return fp;
2567
}
2568
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
2569
2570
static unsigned int __bpf_prog_ret1(const void *ctx,
2571
const struct bpf_insn *insn)
2572
{
2573
return 1;
2574
}
2575
2576
static struct bpf_prog_dummy {
2577
struct bpf_prog prog;
2578
} dummy_bpf_prog = {
2579
.prog = {
2580
.bpf_func = __bpf_prog_ret1,
2581
},
2582
};
2583
2584
struct bpf_empty_prog_array bpf_empty_prog_array = {
2585
.null_prog = NULL,
2586
};
2587
EXPORT_SYMBOL(bpf_empty_prog_array);
2588
2589
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
2590
{
2591
struct bpf_prog_array *p;
2592
2593
if (prog_cnt)
2594
p = kzalloc(struct_size(p, items, prog_cnt + 1), flags);
2595
else
2596
p = &bpf_empty_prog_array.hdr;
2597
2598
return p;
2599
}
2600
2601
void bpf_prog_array_free(struct bpf_prog_array *progs)
2602
{
2603
if (!progs || progs == &bpf_empty_prog_array.hdr)
2604
return;
2605
kfree_rcu(progs, rcu);
2606
}
2607
2608
static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu)
2609
{
2610
struct bpf_prog_array *progs;
2611
2612
/* If RCU Tasks Trace grace period implies RCU grace period, there is
2613
* no need to call kfree_rcu(), just call kfree() directly.
2614
*/
2615
progs = container_of(rcu, struct bpf_prog_array, rcu);
2616
if (rcu_trace_implies_rcu_gp())
2617
kfree(progs);
2618
else
2619
kfree_rcu(progs, rcu);
2620
}
2621
2622
void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
2623
{
2624
if (!progs || progs == &bpf_empty_prog_array.hdr)
2625
return;
2626
call_rcu_tasks_trace(&progs->rcu, __bpf_prog_array_free_sleepable_cb);
2627
}
2628
2629
int bpf_prog_array_length(struct bpf_prog_array *array)
2630
{
2631
struct bpf_prog_array_item *item;
2632
u32 cnt = 0;
2633
2634
for (item = array->items; item->prog; item++)
2635
if (item->prog != &dummy_bpf_prog.prog)
2636
cnt++;
2637
return cnt;
2638
}
2639
2640
bool bpf_prog_array_is_empty(struct bpf_prog_array *array)
2641
{
2642
struct bpf_prog_array_item *item;
2643
2644
for (item = array->items; item->prog; item++)
2645
if (item->prog != &dummy_bpf_prog.prog)
2646
return false;
2647
return true;
2648
}
2649
2650
static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
2651
u32 *prog_ids,
2652
u32 request_cnt)
2653
{
2654
struct bpf_prog_array_item *item;
2655
int i = 0;
2656
2657
for (item = array->items; item->prog; item++) {
2658
if (item->prog == &dummy_bpf_prog.prog)
2659
continue;
2660
prog_ids[i] = item->prog->aux->id;
2661
if (++i == request_cnt) {
2662
item++;
2663
break;
2664
}
2665
}
2666
2667
return !!(item->prog);
2668
}
2669
2670
int bpf_prog_array_copy_to_user(struct bpf_prog_array *array,
2671
__u32 __user *prog_ids, u32 cnt)
2672
{
2673
unsigned long err = 0;
2674
bool nospc;
2675
u32 *ids;
2676
2677
/* users of this function are doing:
2678
* cnt = bpf_prog_array_length();
2679
* if (cnt > 0)
2680
* bpf_prog_array_copy_to_user(..., cnt);
2681
* so below kcalloc doesn't need extra cnt > 0 check.
2682
*/
2683
ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
2684
if (!ids)
2685
return -ENOMEM;
2686
nospc = bpf_prog_array_copy_core(array, ids, cnt);
2687
err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
2688
kfree(ids);
2689
if (err)
2690
return -EFAULT;
2691
if (nospc)
2692
return -ENOSPC;
2693
return 0;
2694
}
2695
2696
void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
2697
struct bpf_prog *old_prog)
2698
{
2699
struct bpf_prog_array_item *item;
2700
2701
for (item = array->items; item->prog; item++)
2702
if (item->prog == old_prog) {
2703
WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
2704
break;
2705
}
2706
}
2707
2708
/**
2709
* bpf_prog_array_delete_safe_at() - Replaces the program at the given
2710
* index into the program array with
2711
* a dummy no-op program.
2712
* @array: a bpf_prog_array
2713
* @index: the index of the program to replace
2714
*
2715
* Skips over dummy programs, by not counting them, when calculating
2716
* the position of the program to replace.
2717
*
2718
* Return:
2719
* * 0 - Success
2720
* * -EINVAL - Invalid index value. Must be a non-negative integer.
2721
* * -ENOENT - Index out of range
2722
*/
2723
int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index)
2724
{
2725
return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog);
2726
}
2727
2728
/**
2729
* bpf_prog_array_update_at() - Updates the program at the given index
2730
* into the program array.
2731
* @array: a bpf_prog_array
2732
* @index: the index of the program to update
2733
* @prog: the program to insert into the array
2734
*
2735
* Skips over dummy programs, by not counting them, when calculating
2736
* the position of the program to update.
2737
*
2738
* Return:
2739
* * 0 - Success
2740
* * -EINVAL - Invalid index value. Must be a non-negative integer.
2741
* * -ENOENT - Index out of range
2742
*/
2743
int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
2744
struct bpf_prog *prog)
2745
{
2746
struct bpf_prog_array_item *item;
2747
2748
if (unlikely(index < 0))
2749
return -EINVAL;
2750
2751
for (item = array->items; item->prog; item++) {
2752
if (item->prog == &dummy_bpf_prog.prog)
2753
continue;
2754
if (!index) {
2755
WRITE_ONCE(item->prog, prog);
2756
return 0;
2757
}
2758
index--;
2759
}
2760
return -ENOENT;
2761
}
2762
2763
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
2764
struct bpf_prog *exclude_prog,
2765
struct bpf_prog *include_prog,
2766
u64 bpf_cookie,
2767
struct bpf_prog_array **new_array)
2768
{
2769
int new_prog_cnt, carry_prog_cnt = 0;
2770
struct bpf_prog_array_item *existing, *new;
2771
struct bpf_prog_array *array;
2772
bool found_exclude = false;
2773
2774
/* Figure out how many existing progs we need to carry over to
2775
* the new array.
2776
*/
2777
if (old_array) {
2778
existing = old_array->items;
2779
for (; existing->prog; existing++) {
2780
if (existing->prog == exclude_prog) {
2781
found_exclude = true;
2782
continue;
2783
}
2784
if (existing->prog != &dummy_bpf_prog.prog)
2785
carry_prog_cnt++;
2786
if (existing->prog == include_prog)
2787
return -EEXIST;
2788
}
2789
}
2790
2791
if (exclude_prog && !found_exclude)
2792
return -ENOENT;
2793
2794
/* How many progs (not NULL) will be in the new array? */
2795
new_prog_cnt = carry_prog_cnt;
2796
if (include_prog)
2797
new_prog_cnt += 1;
2798
2799
/* Do we have any prog (not NULL) in the new array? */
2800
if (!new_prog_cnt) {
2801
*new_array = NULL;
2802
return 0;
2803
}
2804
2805
/* +1 as the end of prog_array is marked with NULL */
2806
array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
2807
if (!array)
2808
return -ENOMEM;
2809
new = array->items;
2810
2811
/* Fill in the new prog array */
2812
if (carry_prog_cnt) {
2813
existing = old_array->items;
2814
for (; existing->prog; existing++) {
2815
if (existing->prog == exclude_prog ||
2816
existing->prog == &dummy_bpf_prog.prog)
2817
continue;
2818
2819
new->prog = existing->prog;
2820
new->bpf_cookie = existing->bpf_cookie;
2821
new++;
2822
}
2823
}
2824
if (include_prog) {
2825
new->prog = include_prog;
2826
new->bpf_cookie = bpf_cookie;
2827
new++;
2828
}
2829
new->prog = NULL;
2830
*new_array = array;
2831
return 0;
2832
}
2833
2834
int bpf_prog_array_copy_info(struct bpf_prog_array *array,
2835
u32 *prog_ids, u32 request_cnt,
2836
u32 *prog_cnt)
2837
{
2838
u32 cnt = 0;
2839
2840
if (array)
2841
cnt = bpf_prog_array_length(array);
2842
2843
*prog_cnt = cnt;
2844
2845
/* return early if user requested only program count or nothing to copy */
2846
if (!request_cnt || !cnt)
2847
return 0;
2848
2849
/* this function is called under trace/bpf_trace.c: bpf_event_mutex */
2850
return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC
2851
: 0;
2852
}
2853
2854
void __bpf_free_used_maps(struct bpf_prog_aux *aux,
2855
struct bpf_map **used_maps, u32 len)
2856
{
2857
struct bpf_map *map;
2858
bool sleepable;
2859
u32 i;
2860
2861
sleepable = aux->prog->sleepable;
2862
for (i = 0; i < len; i++) {
2863
map = used_maps[i];
2864
if (map->ops->map_poke_untrack)
2865
map->ops->map_poke_untrack(map, aux);
2866
if (sleepable)
2867
atomic64_dec(&map->sleepable_refcnt);
2868
bpf_map_put(map);
2869
}
2870
}
2871
2872
static void bpf_free_used_maps(struct bpf_prog_aux *aux)
2873
{
2874
__bpf_free_used_maps(aux, aux->used_maps, aux->used_map_cnt);
2875
kfree(aux->used_maps);
2876
}
2877
2878
void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len)
2879
{
2880
#ifdef CONFIG_BPF_SYSCALL
2881
struct btf_mod_pair *btf_mod;
2882
u32 i;
2883
2884
for (i = 0; i < len; i++) {
2885
btf_mod = &used_btfs[i];
2886
if (btf_mod->module)
2887
module_put(btf_mod->module);
2888
btf_put(btf_mod->btf);
2889
}
2890
#endif
2891
}
2892
2893
static void bpf_free_used_btfs(struct bpf_prog_aux *aux)
2894
{
2895
__bpf_free_used_btfs(aux->used_btfs, aux->used_btf_cnt);
2896
kfree(aux->used_btfs);
2897
}
2898
2899
static void bpf_prog_free_deferred(struct work_struct *work)
2900
{
2901
struct bpf_prog_aux *aux;
2902
int i;
2903
2904
aux = container_of(work, struct bpf_prog_aux, work);
2905
#ifdef CONFIG_BPF_SYSCALL
2906
bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab);
2907
bpf_prog_stream_free(aux->prog);
2908
#endif
2909
#ifdef CONFIG_CGROUP_BPF
2910
if (aux->cgroup_atype != CGROUP_BPF_ATTACH_TYPE_INVALID)
2911
bpf_cgroup_atype_put(aux->cgroup_atype);
2912
#endif
2913
bpf_free_used_maps(aux);
2914
bpf_free_used_btfs(aux);
2915
if (bpf_prog_is_dev_bound(aux))
2916
bpf_prog_dev_bound_destroy(aux->prog);
2917
#ifdef CONFIG_PERF_EVENTS
2918
if (aux->prog->has_callchain_buf)
2919
put_callchain_buffers();
2920
#endif
2921
if (aux->dst_trampoline)
2922
bpf_trampoline_put(aux->dst_trampoline);
2923
for (i = 0; i < aux->real_func_cnt; i++) {
2924
/* We can just unlink the subprog poke descriptor table as
2925
* it was originally linked to the main program and is also
2926
* released along with it.
2927
*/
2928
aux->func[i]->aux->poke_tab = NULL;
2929
bpf_jit_free(aux->func[i]);
2930
}
2931
if (aux->real_func_cnt) {
2932
kfree(aux->func);
2933
bpf_prog_unlock_free(aux->prog);
2934
} else {
2935
bpf_jit_free(aux->prog);
2936
}
2937
}
2938
2939
void bpf_prog_free(struct bpf_prog *fp)
2940
{
2941
struct bpf_prog_aux *aux = fp->aux;
2942
2943
if (aux->dst_prog)
2944
bpf_prog_put(aux->dst_prog);
2945
bpf_token_put(aux->token);
2946
INIT_WORK(&aux->work, bpf_prog_free_deferred);
2947
schedule_work(&aux->work);
2948
}
2949
EXPORT_SYMBOL_GPL(bpf_prog_free);
2950
2951
/* RNG for unprivileged user space with separated state from prandom_u32(). */
2952
static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state);
2953
2954
void bpf_user_rnd_init_once(void)
2955
{
2956
prandom_init_once(&bpf_user_rnd_state);
2957
}
2958
2959
BPF_CALL_0(bpf_user_rnd_u32)
2960
{
2961
/* Should someone ever have the rather unwise idea to use some
2962
* of the registers passed into this function, then note that
2963
* this function is called from native eBPF and classic-to-eBPF
2964
* transformations. Register assignments from both sides are
2965
* different, f.e. classic always sets fn(ctx, A, X) here.
2966
*/
2967
struct rnd_state *state;
2968
u32 res;
2969
2970
state = &get_cpu_var(bpf_user_rnd_state);
2971
res = prandom_u32_state(state);
2972
put_cpu_var(bpf_user_rnd_state);
2973
2974
return res;
2975
}
2976
2977
BPF_CALL_0(bpf_get_raw_cpu_id)
2978
{
2979
return raw_smp_processor_id();
2980
}
2981
2982
/* Weak definitions of helper functions in case we don't have bpf syscall. */
2983
const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
2984
const struct bpf_func_proto bpf_map_update_elem_proto __weak;
2985
const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
2986
const struct bpf_func_proto bpf_map_push_elem_proto __weak;
2987
const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
2988
const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
2989
const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto __weak;
2990
const struct bpf_func_proto bpf_spin_lock_proto __weak;
2991
const struct bpf_func_proto bpf_spin_unlock_proto __weak;
2992
const struct bpf_func_proto bpf_jiffies64_proto __weak;
2993
2994
const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
2995
const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
2996
const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
2997
const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
2998
const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
2999
const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
3000
const struct bpf_func_proto bpf_ktime_get_tai_ns_proto __weak;
3001
3002
const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
3003
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
3004
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
3005
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
3006
const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak;
3007
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
3008
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
3009
const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
3010
const struct bpf_func_proto bpf_seq_printf_btf_proto __weak;
3011
const struct bpf_func_proto bpf_set_retval_proto __weak;
3012
const struct bpf_func_proto bpf_get_retval_proto __weak;
3013
3014
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
3015
{
3016
return NULL;
3017
}
3018
3019
const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void)
3020
{
3021
return NULL;
3022
}
3023
3024
const struct bpf_func_proto * __weak bpf_get_perf_event_read_value_proto(void)
3025
{
3026
return NULL;
3027
}
3028
3029
u64 __weak
3030
bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
3031
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
3032
{
3033
return -ENOTSUPP;
3034
}
3035
EXPORT_SYMBOL_GPL(bpf_event_output);
3036
3037
/* Always built-in helper functions. */
3038
const struct bpf_func_proto bpf_tail_call_proto = {
3039
/* func is unused for tail_call, we set it to pass the
3040
* get_helper_proto check
3041
*/
3042
.func = BPF_PTR_POISON,
3043
.gpl_only = false,
3044
.ret_type = RET_VOID,
3045
.arg1_type = ARG_PTR_TO_CTX,
3046
.arg2_type = ARG_CONST_MAP_PTR,
3047
.arg3_type = ARG_ANYTHING,
3048
};
3049
3050
/* Stub for JITs that only support cBPF. eBPF programs are interpreted.
3051
* It is encouraged to implement bpf_int_jit_compile() instead, so that
3052
* eBPF and implicitly also cBPF can get JITed!
3053
*/
3054
struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
3055
{
3056
return prog;
3057
}
3058
3059
/* Stub for JITs that support eBPF. All cBPF code gets transformed into
3060
* eBPF by the kernel and is later compiled by bpf_int_jit_compile().
3061
*/
3062
void __weak bpf_jit_compile(struct bpf_prog *prog)
3063
{
3064
}
3065
3066
bool __weak bpf_helper_changes_pkt_data(enum bpf_func_id func_id)
3067
{
3068
return false;
3069
}
3070
3071
/* Return TRUE if the JIT backend wants verifier to enable sub-register usage
3072
* analysis code and wants explicit zero extension inserted by verifier.
3073
* Otherwise, return FALSE.
3074
*
3075
* The verifier inserts an explicit zero extension after BPF_CMPXCHGs even if
3076
* you don't override this. JITs that don't want these extra insns can detect
3077
* them using insn_is_zext.
3078
*/
3079
bool __weak bpf_jit_needs_zext(void)
3080
{
3081
return false;
3082
}
3083
3084
/* By default, enable the verifier's mitigations against Spectre v1 and v4 for
3085
* all archs. The value returned must not change at runtime as there is
3086
* currently no support for reloading programs that were loaded without
3087
* mitigations.
3088
*/
3089
bool __weak bpf_jit_bypass_spec_v1(void)
3090
{
3091
return false;
3092
}
3093
3094
bool __weak bpf_jit_bypass_spec_v4(void)
3095
{
3096
return false;
3097
}
3098
3099
/* Return true if the JIT inlines the call to the helper corresponding to
3100
* the imm.
3101
*
3102
* The verifier will not patch the insn->imm for the call to the helper if
3103
* this returns true.
3104
*/
3105
bool __weak bpf_jit_inlines_helper_call(s32 imm)
3106
{
3107
return false;
3108
}
3109
3110
/* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */
3111
bool __weak bpf_jit_supports_subprog_tailcalls(void)
3112
{
3113
return false;
3114
}
3115
3116
bool __weak bpf_jit_supports_percpu_insn(void)
3117
{
3118
return false;
3119
}
3120
3121
bool __weak bpf_jit_supports_kfunc_call(void)
3122
{
3123
return false;
3124
}
3125
3126
bool __weak bpf_jit_supports_far_kfunc_call(void)
3127
{
3128
return false;
3129
}
3130
3131
bool __weak bpf_jit_supports_arena(void)
3132
{
3133
return false;
3134
}
3135
3136
bool __weak bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
3137
{
3138
return false;
3139
}
3140
3141
u64 __weak bpf_arch_uaddress_limit(void)
3142
{
3143
#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE)
3144
return TASK_SIZE;
3145
#else
3146
return 0;
3147
#endif
3148
}
3149
3150
/* Return TRUE if the JIT backend satisfies the following two conditions:
3151
* 1) JIT backend supports atomic_xchg() on pointer-sized words.
3152
* 2) Under the specific arch, the implementation of xchg() is the same
3153
* as atomic_xchg() on pointer-sized words.
3154
*/
3155
bool __weak bpf_jit_supports_ptr_xchg(void)
3156
{
3157
return false;
3158
}
3159
3160
/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
3161
* skb_copy_bits(), so provide a weak definition of it for NET-less config.
3162
*/
3163
int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
3164
int len)
3165
{
3166
return -EFAULT;
3167
}
3168
3169
int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
3170
enum bpf_text_poke_type new_t, void *old_addr,
3171
void *new_addr)
3172
{
3173
return -ENOTSUPP;
3174
}
3175
3176
void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len)
3177
{
3178
return ERR_PTR(-ENOTSUPP);
3179
}
3180
3181
int __weak bpf_arch_text_invalidate(void *dst, size_t len)
3182
{
3183
return -ENOTSUPP;
3184
}
3185
3186
bool __weak bpf_jit_supports_exceptions(void)
3187
{
3188
return false;
3189
}
3190
3191
bool __weak bpf_jit_supports_private_stack(void)
3192
{
3193
return false;
3194
}
3195
3196
void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie)
3197
{
3198
}
3199
3200
bool __weak bpf_jit_supports_timed_may_goto(void)
3201
{
3202
return false;
3203
}
3204
3205
u64 __weak arch_bpf_timed_may_goto(void)
3206
{
3207
return 0;
3208
}
3209
3210
static noinline void bpf_prog_report_may_goto_violation(void)
3211
{
3212
#ifdef CONFIG_BPF_SYSCALL
3213
struct bpf_stream_stage ss;
3214
struct bpf_prog *prog;
3215
3216
prog = bpf_prog_find_from_stack();
3217
if (!prog)
3218
return;
3219
bpf_stream_stage(ss, prog, BPF_STDERR, ({
3220
bpf_stream_printk(ss, "ERROR: Timeout detected for may_goto instruction\n");
3221
bpf_stream_dump_stack(ss);
3222
}));
3223
#endif
3224
}
3225
3226
u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *p)
3227
{
3228
u64 time = ktime_get_mono_fast_ns();
3229
3230
/* Populate the timestamp for this stack frame, and refresh count. */
3231
if (!p->timestamp) {
3232
p->timestamp = time;
3233
return BPF_MAX_TIMED_LOOPS;
3234
}
3235
/* Check if we've exhausted our time slice, and zero count. */
3236
if (unlikely(time - p->timestamp >= (NSEC_PER_SEC / 4))) {
3237
bpf_prog_report_may_goto_violation();
3238
return 0;
3239
}
3240
/* Refresh the count for the stack frame. */
3241
return BPF_MAX_TIMED_LOOPS;
3242
}
3243
3244
/* for configs without MMU or 32-bit */
3245
__weak const struct bpf_map_ops arena_map_ops;
3246
__weak u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena)
3247
{
3248
return 0;
3249
}
3250
__weak u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena)
3251
{
3252
return 0;
3253
}
3254
3255
#ifdef CONFIG_BPF_SYSCALL
3256
static int __init bpf_global_ma_init(void)
3257
{
3258
int ret;
3259
3260
ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
3261
bpf_global_ma_set = !ret;
3262
return ret;
3263
}
3264
late_initcall(bpf_global_ma_init);
3265
#endif
3266
3267
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
3268
EXPORT_SYMBOL(bpf_stats_enabled_key);
3269
3270
/* All definitions of tracepoints related to BPF. */
3271
#define CREATE_TRACE_POINTS
3272
#include <linux/bpf_trace.h>
3273
3274
EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
3275
EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx);
3276
3277
#ifdef CONFIG_BPF_SYSCALL
3278
3279
int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep,
3280
const char **linep, int *nump)
3281
{
3282
int idx = -1, insn_start, insn_end, len;
3283
struct bpf_line_info *linfo;
3284
void **jited_linfo;
3285
struct btf *btf;
3286
int nr_linfo;
3287
3288
btf = prog->aux->btf;
3289
linfo = prog->aux->linfo;
3290
jited_linfo = prog->aux->jited_linfo;
3291
3292
if (!btf || !linfo || !jited_linfo)
3293
return -EINVAL;
3294
len = prog->aux->func ? prog->aux->func[prog->aux->func_idx]->len : prog->len;
3295
3296
linfo = &prog->aux->linfo[prog->aux->linfo_idx];
3297
jited_linfo = &prog->aux->jited_linfo[prog->aux->linfo_idx];
3298
3299
insn_start = linfo[0].insn_off;
3300
insn_end = insn_start + len;
3301
nr_linfo = prog->aux->nr_linfo - prog->aux->linfo_idx;
3302
3303
for (int i = 0; i < nr_linfo &&
3304
linfo[i].insn_off >= insn_start && linfo[i].insn_off < insn_end; i++) {
3305
if (jited_linfo[i] >= (void *)ip)
3306
break;
3307
idx = i;
3308
}
3309
3310
if (idx == -1)
3311
return -ENOENT;
3312
3313
/* Get base component of the file path. */
3314
*filep = btf_name_by_offset(btf, linfo[idx].file_name_off);
3315
*filep = kbasename(*filep);
3316
/* Obtain the source line, and strip whitespace in prefix. */
3317
*linep = btf_name_by_offset(btf, linfo[idx].line_off);
3318
while (isspace(**linep))
3319
*linep += 1;
3320
*nump = BPF_LINE_INFO_LINE_NUM(linfo[idx].line_col);
3321
return 0;
3322
}
3323
3324
struct walk_stack_ctx {
3325
struct bpf_prog *prog;
3326
};
3327
3328
static bool find_from_stack_cb(void *cookie, u64 ip, u64 sp, u64 bp)
3329
{
3330
struct walk_stack_ctx *ctxp = cookie;
3331
struct bpf_prog *prog;
3332
3333
/*
3334
* The RCU read lock is held to safely traverse the latch tree, but we
3335
* don't need its protection when accessing the prog, since it has an
3336
* active stack frame on the current stack trace, and won't disappear.
3337
*/
3338
rcu_read_lock();
3339
prog = bpf_prog_ksym_find(ip);
3340
rcu_read_unlock();
3341
if (!prog)
3342
return true;
3343
/* Make sure we return the main prog if we found a subprog */
3344
ctxp->prog = prog->aux->main_prog_aux->prog;
3345
return false;
3346
}
3347
3348
struct bpf_prog *bpf_prog_find_from_stack(void)
3349
{
3350
struct walk_stack_ctx ctx = {};
3351
3352
arch_bpf_stack_walk(find_from_stack_cb, &ctx);
3353
return ctx.prog;
3354
}
3355
3356
#endif
3357
3358