Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/kernel/bpf/fixups.c
170852 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
3
#include <linux/bpf.h>
4
#include <linux/btf.h>
5
#include <linux/bpf_verifier.h>
6
#include <linux/filter.h>
7
#include <linux/vmalloc.h>
8
#include <linux/bsearch.h>
9
#include <linux/sort.h>
10
#include <linux/perf_event.h>
11
#include <net/xdp.h>
12
#include "disasm.h"
13
14
#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
15
16
static bool is_cmpxchg_insn(const struct bpf_insn *insn)
17
{
18
return BPF_CLASS(insn->code) == BPF_STX &&
19
BPF_MODE(insn->code) == BPF_ATOMIC &&
20
insn->imm == BPF_CMPXCHG;
21
}
22
23
/* Return the regno defined by the insn, or -1. */
24
static int insn_def_regno(const struct bpf_insn *insn)
25
{
26
switch (BPF_CLASS(insn->code)) {
27
case BPF_JMP:
28
case BPF_JMP32:
29
case BPF_ST:
30
return -1;
31
case BPF_STX:
32
if (BPF_MODE(insn->code) == BPF_ATOMIC ||
33
BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
34
if (insn->imm == BPF_CMPXCHG)
35
return BPF_REG_0;
36
else if (insn->imm == BPF_LOAD_ACQ)
37
return insn->dst_reg;
38
else if (insn->imm & BPF_FETCH)
39
return insn->src_reg;
40
}
41
return -1;
42
default:
43
return insn->dst_reg;
44
}
45
}
46
47
/* Return TRUE if INSN has defined any 32-bit value explicitly. */
48
static bool insn_has_def32(struct bpf_insn *insn)
49
{
50
int dst_reg = insn_def_regno(insn);
51
52
if (dst_reg == -1)
53
return false;
54
55
return !bpf_is_reg64(insn, dst_reg, NULL, DST_OP);
56
}
57
58
static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
59
{
60
const struct bpf_kfunc_desc *d0 = a;
61
const struct bpf_kfunc_desc *d1 = b;
62
63
if (d0->imm != d1->imm)
64
return d0->imm < d1->imm ? -1 : 1;
65
if (d0->offset != d1->offset)
66
return d0->offset < d1->offset ? -1 : 1;
67
return 0;
68
}
69
70
const struct btf_func_model *
71
bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
72
const struct bpf_insn *insn)
73
{
74
const struct bpf_kfunc_desc desc = {
75
.imm = insn->imm,
76
.offset = insn->off,
77
};
78
const struct bpf_kfunc_desc *res;
79
struct bpf_kfunc_desc_tab *tab;
80
81
tab = prog->aux->kfunc_tab;
82
res = bsearch(&desc, tab->descs, tab->nr_descs,
83
sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
84
85
return res ? &res->func_model : NULL;
86
}
87
88
static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc)
89
{
90
unsigned long call_imm;
91
92
if (bpf_jit_supports_far_kfunc_call()) {
93
call_imm = desc->func_id;
94
} else {
95
call_imm = BPF_CALL_IMM(desc->addr);
96
/* Check whether the relative offset overflows desc->imm */
97
if ((unsigned long)(s32)call_imm != call_imm) {
98
verbose(env, "address of kernel func_id %u is out of range\n",
99
desc->func_id);
100
return -EINVAL;
101
}
102
}
103
desc->imm = call_imm;
104
return 0;
105
}
106
107
static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env)
108
{
109
struct bpf_kfunc_desc_tab *tab;
110
int i, err;
111
112
tab = env->prog->aux->kfunc_tab;
113
if (!tab)
114
return 0;
115
116
for (i = 0; i < tab->nr_descs; i++) {
117
err = set_kfunc_desc_imm(env, &tab->descs[i]);
118
if (err)
119
return err;
120
}
121
122
sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
123
kfunc_desc_cmp_by_imm_off, NULL);
124
return 0;
125
}
126
127
static int add_kfunc_in_insns(struct bpf_verifier_env *env,
128
struct bpf_insn *insn, int cnt)
129
{
130
int i, ret;
131
132
for (i = 0; i < cnt; i++, insn++) {
133
if (bpf_pseudo_kfunc_call(insn)) {
134
ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
135
if (ret < 0)
136
return ret;
137
}
138
}
139
return 0;
140
}
141
142
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
143
static int get_callee_stack_depth(struct bpf_verifier_env *env,
144
const struct bpf_insn *insn, int idx)
145
{
146
int start = idx + insn->imm + 1, subprog;
147
148
subprog = bpf_find_subprog(env, start);
149
if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
150
return -EFAULT;
151
return env->subprog_info[subprog].stack_depth;
152
}
153
#endif
154
155
/* single env->prog->insni[off] instruction was replaced with the range
156
* insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
157
* [0, off) and [off, end) to new locations, so the patched range stays zero
158
*/
159
static void adjust_insn_aux_data(struct bpf_verifier_env *env,
160
struct bpf_prog *new_prog, u32 off, u32 cnt)
161
{
162
struct bpf_insn_aux_data *data = env->insn_aux_data;
163
struct bpf_insn *insn = new_prog->insnsi;
164
u32 old_seen = data[off].seen;
165
u32 prog_len;
166
int i;
167
168
/* aux info at OFF always needs adjustment, no matter fast path
169
* (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
170
* original insn at old prog.
171
*/
172
data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
173
174
if (cnt == 1)
175
return;
176
prog_len = new_prog->len;
177
178
memmove(data + off + cnt - 1, data + off,
179
sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
180
memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
181
for (i = off; i < off + cnt - 1; i++) {
182
/* Expand insni[off]'s seen count to the patched range. */
183
data[i].seen = old_seen;
184
data[i].zext_dst = insn_has_def32(insn + i);
185
}
186
187
/*
188
* The indirect_target flag of the original instruction was moved to the last of the
189
* new instructions by the above memmove and memset, but the indirect jump target is
190
* actually the first instruction, so move it back. This also matches with the behavior
191
* of bpf_insn_array_adjust(), which preserves xlated_off to point to the first new
192
* instruction.
193
*/
194
if (data[off + cnt - 1].indirect_target) {
195
data[off].indirect_target = 1;
196
data[off + cnt - 1].indirect_target = 0;
197
}
198
}
199
200
static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
201
{
202
int i;
203
204
if (len == 1)
205
return;
206
/* NOTE: fake 'exit' subprog should be updated as well. */
207
for (i = 0; i <= env->subprog_cnt; i++) {
208
if (env->subprog_info[i].start <= off)
209
continue;
210
env->subprog_info[i].start += len - 1;
211
}
212
}
213
214
static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len)
215
{
216
int i;
217
218
if (len == 1)
219
return;
220
221
for (i = 0; i < env->insn_array_map_cnt; i++)
222
bpf_insn_array_adjust(env->insn_array_maps[i], off, len);
223
}
224
225
static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len)
226
{
227
int i;
228
229
for (i = 0; i < env->insn_array_map_cnt; i++)
230
bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len);
231
}
232
233
static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
234
{
235
struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
236
int i, sz = prog->aux->size_poke_tab;
237
struct bpf_jit_poke_descriptor *desc;
238
239
for (i = 0; i < sz; i++) {
240
desc = &tab[i];
241
if (desc->insn_idx <= off)
242
continue;
243
desc->insn_idx += len - 1;
244
}
245
}
246
247
struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
248
const struct bpf_insn *patch, u32 len)
249
{
250
struct bpf_prog *new_prog;
251
struct bpf_insn_aux_data *new_data = NULL;
252
253
if (len > 1) {
254
new_data = vrealloc(env->insn_aux_data,
255
array_size(env->prog->len + len - 1,
256
sizeof(struct bpf_insn_aux_data)),
257
GFP_KERNEL_ACCOUNT | __GFP_ZERO);
258
if (!new_data)
259
return NULL;
260
261
env->insn_aux_data = new_data;
262
}
263
264
new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
265
if (IS_ERR(new_prog)) {
266
if (PTR_ERR(new_prog) == -ERANGE)
267
verbose(env,
268
"insn %d cannot be patched due to 16-bit range\n",
269
env->insn_aux_data[off].orig_idx);
270
return NULL;
271
}
272
adjust_insn_aux_data(env, new_prog, off, len);
273
adjust_subprog_starts(env, off, len);
274
adjust_insn_arrays(env, off, len);
275
adjust_poke_descs(new_prog, off, len);
276
return new_prog;
277
}
278
279
/*
280
* For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
281
* jump offset by 'delta'.
282
*/
283
static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
284
{
285
struct bpf_insn *insn = prog->insnsi;
286
u32 insn_cnt = prog->len, i;
287
s32 imm;
288
s16 off;
289
290
for (i = 0; i < insn_cnt; i++, insn++) {
291
u8 code = insn->code;
292
293
if (tgt_idx <= i && i < tgt_idx + delta)
294
continue;
295
296
if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
297
BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
298
continue;
299
300
if (insn->code == (BPF_JMP32 | BPF_JA)) {
301
if (i + 1 + insn->imm != tgt_idx)
302
continue;
303
if (check_add_overflow(insn->imm, delta, &imm))
304
return -ERANGE;
305
insn->imm = imm;
306
} else {
307
if (i + 1 + insn->off != tgt_idx)
308
continue;
309
if (check_add_overflow(insn->off, delta, &off))
310
return -ERANGE;
311
insn->off = off;
312
}
313
}
314
return 0;
315
}
316
317
static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
318
u32 off, u32 cnt)
319
{
320
int i, j;
321
322
/* find first prog starting at or after off (first to remove) */
323
for (i = 0; i < env->subprog_cnt; i++)
324
if (env->subprog_info[i].start >= off)
325
break;
326
/* find first prog starting at or after off + cnt (first to stay) */
327
for (j = i; j < env->subprog_cnt; j++)
328
if (env->subprog_info[j].start >= off + cnt)
329
break;
330
/* if j doesn't start exactly at off + cnt, we are just removing
331
* the front of previous prog
332
*/
333
if (env->subprog_info[j].start != off + cnt)
334
j--;
335
336
if (j > i) {
337
struct bpf_prog_aux *aux = env->prog->aux;
338
int move;
339
340
/* move fake 'exit' subprog as well */
341
move = env->subprog_cnt + 1 - j;
342
343
memmove(env->subprog_info + i,
344
env->subprog_info + j,
345
sizeof(*env->subprog_info) * move);
346
env->subprog_cnt -= j - i;
347
348
/* remove func_info */
349
if (aux->func_info) {
350
move = aux->func_info_cnt - j;
351
352
memmove(aux->func_info + i,
353
aux->func_info + j,
354
sizeof(*aux->func_info) * move);
355
aux->func_info_cnt -= j - i;
356
/* func_info->insn_off is set after all code rewrites,
357
* in adjust_btf_func() - no need to adjust
358
*/
359
}
360
} else {
361
/* convert i from "first prog to remove" to "first to adjust" */
362
if (env->subprog_info[i].start == off)
363
i++;
364
}
365
366
/* update fake 'exit' subprog as well */
367
for (; i <= env->subprog_cnt; i++)
368
env->subprog_info[i].start -= cnt;
369
370
return 0;
371
}
372
373
static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
374
u32 cnt)
375
{
376
struct bpf_prog *prog = env->prog;
377
u32 i, l_off, l_cnt, nr_linfo;
378
struct bpf_line_info *linfo;
379
380
nr_linfo = prog->aux->nr_linfo;
381
if (!nr_linfo)
382
return 0;
383
384
linfo = prog->aux->linfo;
385
386
/* find first line info to remove, count lines to be removed */
387
for (i = 0; i < nr_linfo; i++)
388
if (linfo[i].insn_off >= off)
389
break;
390
391
l_off = i;
392
l_cnt = 0;
393
for (; i < nr_linfo; i++)
394
if (linfo[i].insn_off < off + cnt)
395
l_cnt++;
396
else
397
break;
398
399
/* First live insn doesn't match first live linfo, it needs to "inherit"
400
* last removed linfo. prog is already modified, so prog->len == off
401
* means no live instructions after (tail of the program was removed).
402
*/
403
if (prog->len != off && l_cnt &&
404
(i == nr_linfo || linfo[i].insn_off != off + cnt)) {
405
l_cnt--;
406
linfo[--i].insn_off = off + cnt;
407
}
408
409
/* remove the line info which refer to the removed instructions */
410
if (l_cnt) {
411
memmove(linfo + l_off, linfo + i,
412
sizeof(*linfo) * (nr_linfo - i));
413
414
prog->aux->nr_linfo -= l_cnt;
415
nr_linfo = prog->aux->nr_linfo;
416
}
417
418
/* pull all linfo[i].insn_off >= off + cnt in by cnt */
419
for (i = l_off; i < nr_linfo; i++)
420
linfo[i].insn_off -= cnt;
421
422
/* fix up all subprogs (incl. 'exit') which start >= off */
423
for (i = 0; i <= env->subprog_cnt; i++)
424
if (env->subprog_info[i].linfo_idx > l_off) {
425
/* program may have started in the removed region but
426
* may not be fully removed
427
*/
428
if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
429
env->subprog_info[i].linfo_idx -= l_cnt;
430
else
431
env->subprog_info[i].linfo_idx = l_off;
432
}
433
434
return 0;
435
}
436
437
/*
438
* Clean up dynamically allocated fields of aux data for instructions [start, ...]
439
*/
440
void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len)
441
{
442
struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
443
struct bpf_insn *insns = env->prog->insnsi;
444
int end = start + len;
445
int i;
446
447
for (i = start; i < end; i++) {
448
if (aux_data[i].jt) {
449
kvfree(aux_data[i].jt);
450
aux_data[i].jt = NULL;
451
}
452
453
if (bpf_is_ldimm64(&insns[i]))
454
i++;
455
}
456
}
457
458
static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
459
{
460
struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
461
unsigned int orig_prog_len = env->prog->len;
462
int err;
463
464
if (bpf_prog_is_offloaded(env->prog->aux))
465
bpf_prog_offload_remove_insns(env, off, cnt);
466
467
/* Should be called before bpf_remove_insns, as it uses prog->insnsi */
468
bpf_clear_insn_aux_data(env, off, cnt);
469
470
err = bpf_remove_insns(env->prog, off, cnt);
471
if (err)
472
return err;
473
474
err = adjust_subprog_starts_after_remove(env, off, cnt);
475
if (err)
476
return err;
477
478
err = bpf_adj_linfo_after_remove(env, off, cnt);
479
if (err)
480
return err;
481
482
adjust_insn_arrays_after_remove(env, off, cnt);
483
484
memmove(aux_data + off, aux_data + off + cnt,
485
sizeof(*aux_data) * (orig_prog_len - off - cnt));
486
487
return 0;
488
}
489
490
static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
491
static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
492
493
bool bpf_insn_is_cond_jump(u8 code)
494
{
495
u8 op;
496
497
op = BPF_OP(code);
498
if (BPF_CLASS(code) == BPF_JMP32)
499
return op != BPF_JA;
500
501
if (BPF_CLASS(code) != BPF_JMP)
502
return false;
503
504
return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
505
}
506
507
void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
508
{
509
struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
510
struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
511
struct bpf_insn *insn = env->prog->insnsi;
512
const int insn_cnt = env->prog->len;
513
int i;
514
515
for (i = 0; i < insn_cnt; i++, insn++) {
516
if (!bpf_insn_is_cond_jump(insn->code))
517
continue;
518
519
if (!aux_data[i + 1].seen)
520
ja.off = insn->off;
521
else if (!aux_data[i + 1 + insn->off].seen)
522
ja.off = 0;
523
else
524
continue;
525
526
if (bpf_prog_is_offloaded(env->prog->aux))
527
bpf_prog_offload_replace_insn(env, i, &ja);
528
529
memcpy(insn, &ja, sizeof(ja));
530
}
531
}
532
533
int bpf_opt_remove_dead_code(struct bpf_verifier_env *env)
534
{
535
struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
536
int insn_cnt = env->prog->len;
537
int i, err;
538
539
for (i = 0; i < insn_cnt; i++) {
540
int j;
541
542
j = 0;
543
while (i + j < insn_cnt && !aux_data[i + j].seen)
544
j++;
545
if (!j)
546
continue;
547
548
err = verifier_remove_insns(env, i, j);
549
if (err)
550
return err;
551
insn_cnt = env->prog->len;
552
}
553
554
return 0;
555
}
556
557
int bpf_opt_remove_nops(struct bpf_verifier_env *env)
558
{
559
struct bpf_insn *insn = env->prog->insnsi;
560
int insn_cnt = env->prog->len;
561
bool is_may_goto_0, is_ja;
562
int i, err;
563
564
for (i = 0; i < insn_cnt; i++) {
565
is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
566
is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
567
568
if (!is_may_goto_0 && !is_ja)
569
continue;
570
571
err = verifier_remove_insns(env, i, 1);
572
if (err)
573
return err;
574
insn_cnt--;
575
/* Go back one insn to catch may_goto +1; may_goto +0 sequence */
576
i -= (is_may_goto_0 && i > 0) ? 2 : 1;
577
}
578
579
return 0;
580
}
581
582
int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
583
const union bpf_attr *attr)
584
{
585
struct bpf_insn *patch;
586
/* use env->insn_buf as two independent buffers */
587
struct bpf_insn *zext_patch = env->insn_buf;
588
struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2];
589
struct bpf_insn_aux_data *aux = env->insn_aux_data;
590
int i, patch_len, delta = 0, len = env->prog->len;
591
struct bpf_insn *insns = env->prog->insnsi;
592
struct bpf_prog *new_prog;
593
bool rnd_hi32;
594
595
rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
596
zext_patch[1] = BPF_ZEXT_REG(0);
597
rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
598
rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
599
rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
600
for (i = 0; i < len; i++) {
601
int adj_idx = i + delta;
602
struct bpf_insn insn;
603
int load_reg;
604
605
insn = insns[adj_idx];
606
load_reg = insn_def_regno(&insn);
607
if (!aux[adj_idx].zext_dst) {
608
u8 code, class;
609
u32 imm_rnd;
610
611
if (!rnd_hi32)
612
continue;
613
614
code = insn.code;
615
class = BPF_CLASS(code);
616
if (load_reg == -1)
617
continue;
618
619
/* NOTE: arg "reg" (the fourth one) is only used for
620
* BPF_STX + SRC_OP, so it is safe to pass NULL
621
* here.
622
*/
623
if (bpf_is_reg64(&insn, load_reg, NULL, DST_OP)) {
624
if (class == BPF_LD &&
625
BPF_MODE(code) == BPF_IMM)
626
i++;
627
continue;
628
}
629
630
/* ctx load could be transformed into wider load. */
631
if (class == BPF_LDX &&
632
aux[adj_idx].ptr_type == PTR_TO_CTX)
633
continue;
634
635
imm_rnd = get_random_u32();
636
rnd_hi32_patch[0] = insn;
637
rnd_hi32_patch[1].imm = imm_rnd;
638
rnd_hi32_patch[3].dst_reg = load_reg;
639
patch = rnd_hi32_patch;
640
patch_len = 4;
641
goto apply_patch_buffer;
642
}
643
644
/* Add in an zero-extend instruction if a) the JIT has requested
645
* it or b) it's a CMPXCHG.
646
*
647
* The latter is because: BPF_CMPXCHG always loads a value into
648
* R0, therefore always zero-extends. However some archs'
649
* equivalent instruction only does this load when the
650
* comparison is successful. This detail of CMPXCHG is
651
* orthogonal to the general zero-extension behaviour of the
652
* CPU, so it's treated independently of bpf_jit_needs_zext.
653
*/
654
if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
655
continue;
656
657
/* Zero-extension is done by the caller. */
658
if (bpf_pseudo_kfunc_call(&insn))
659
continue;
660
661
if (verifier_bug_if(load_reg == -1, env,
662
"zext_dst is set, but no reg is defined"))
663
return -EFAULT;
664
665
zext_patch[0] = insn;
666
zext_patch[1].dst_reg = load_reg;
667
zext_patch[1].src_reg = load_reg;
668
patch = zext_patch;
669
patch_len = 2;
670
apply_patch_buffer:
671
new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
672
if (!new_prog)
673
return -ENOMEM;
674
env->prog = new_prog;
675
insns = new_prog->insnsi;
676
aux = env->insn_aux_data;
677
delta += patch_len - 1;
678
}
679
680
return 0;
681
}
682
683
/* convert load instructions that access fields of a context type into a
684
* sequence of instructions that access fields of the underlying structure:
685
* struct __sk_buff -> struct sk_buff
686
* struct bpf_sock_ops -> struct sock
687
*/
688
int bpf_convert_ctx_accesses(struct bpf_verifier_env *env)
689
{
690
struct bpf_subprog_info *subprogs = env->subprog_info;
691
const struct bpf_verifier_ops *ops = env->ops;
692
int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
693
const int insn_cnt = env->prog->len;
694
struct bpf_insn *epilogue_buf = env->epilogue_buf;
695
struct bpf_insn *insn_buf = env->insn_buf;
696
struct bpf_insn *insn;
697
u32 target_size, size_default, off;
698
struct bpf_prog *new_prog;
699
enum bpf_access_type type;
700
bool is_narrower_load;
701
int epilogue_idx = 0;
702
703
if (ops->gen_epilogue) {
704
epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
705
-(subprogs[0].stack_depth + 8));
706
if (epilogue_cnt >= INSN_BUF_SIZE) {
707
verifier_bug(env, "epilogue is too long");
708
return -EFAULT;
709
} else if (epilogue_cnt) {
710
/* Save the ARG_PTR_TO_CTX for the epilogue to use */
711
cnt = 0;
712
subprogs[0].stack_depth += 8;
713
insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
714
-subprogs[0].stack_depth);
715
insn_buf[cnt++] = env->prog->insnsi[0];
716
new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
717
if (!new_prog)
718
return -ENOMEM;
719
env->prog = new_prog;
720
delta += cnt - 1;
721
722
ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
723
if (ret < 0)
724
return ret;
725
}
726
}
727
728
if (ops->gen_prologue || env->seen_direct_write) {
729
if (!ops->gen_prologue) {
730
verifier_bug(env, "gen_prologue is null");
731
return -EFAULT;
732
}
733
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
734
env->prog);
735
if (cnt >= INSN_BUF_SIZE) {
736
verifier_bug(env, "prologue is too long");
737
return -EFAULT;
738
} else if (cnt) {
739
new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
740
if (!new_prog)
741
return -ENOMEM;
742
743
env->prog = new_prog;
744
delta += cnt - 1;
745
746
ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
747
if (ret < 0)
748
return ret;
749
}
750
}
751
752
if (delta)
753
WARN_ON(adjust_jmp_off(env->prog, 0, delta));
754
755
if (bpf_prog_is_offloaded(env->prog->aux))
756
return 0;
757
758
insn = env->prog->insnsi + delta;
759
760
for (i = 0; i < insn_cnt; i++, insn++) {
761
bpf_convert_ctx_access_t convert_ctx_access;
762
u8 mode;
763
764
if (env->insn_aux_data[i + delta].nospec) {
765
WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
766
struct bpf_insn *patch = insn_buf;
767
768
*patch++ = BPF_ST_NOSPEC();
769
*patch++ = *insn;
770
cnt = patch - insn_buf;
771
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
772
if (!new_prog)
773
return -ENOMEM;
774
775
delta += cnt - 1;
776
env->prog = new_prog;
777
insn = new_prog->insnsi + i + delta;
778
/* This can not be easily merged with the
779
* nospec_result-case, because an insn may require a
780
* nospec before and after itself. Therefore also do not
781
* 'continue' here but potentially apply further
782
* patching to insn. *insn should equal patch[1] now.
783
*/
784
}
785
786
if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
787
insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
788
insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
789
insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
790
insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
791
insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
792
insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
793
type = BPF_READ;
794
} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
795
insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
796
insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
797
insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
798
insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
799
insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
800
insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
801
insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
802
type = BPF_WRITE;
803
} else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
804
insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
805
insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
806
insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
807
env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
808
insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
809
env->prog->aux->num_exentries++;
810
continue;
811
} else if (insn->code == (BPF_JMP | BPF_EXIT) &&
812
epilogue_cnt &&
813
i + delta < subprogs[1].start) {
814
/* Generate epilogue for the main prog */
815
if (epilogue_idx) {
816
/* jump back to the earlier generated epilogue */
817
insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
818
cnt = 1;
819
} else {
820
memcpy(insn_buf, epilogue_buf,
821
epilogue_cnt * sizeof(*epilogue_buf));
822
cnt = epilogue_cnt;
823
/* epilogue_idx cannot be 0. It must have at
824
* least one ctx ptr saving insn before the
825
* epilogue.
826
*/
827
epilogue_idx = i + delta;
828
}
829
goto patch_insn_buf;
830
} else {
831
continue;
832
}
833
834
if (type == BPF_WRITE &&
835
env->insn_aux_data[i + delta].nospec_result) {
836
/* nospec_result is only used to mitigate Spectre v4 and
837
* to limit verification-time for Spectre v1.
838
*/
839
struct bpf_insn *patch = insn_buf;
840
841
*patch++ = *insn;
842
*patch++ = BPF_ST_NOSPEC();
843
cnt = patch - insn_buf;
844
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
845
if (!new_prog)
846
return -ENOMEM;
847
848
delta += cnt - 1;
849
env->prog = new_prog;
850
insn = new_prog->insnsi + i + delta;
851
continue;
852
}
853
854
switch ((int)env->insn_aux_data[i + delta].ptr_type) {
855
case PTR_TO_CTX:
856
if (!ops->convert_ctx_access)
857
continue;
858
convert_ctx_access = ops->convert_ctx_access;
859
break;
860
case PTR_TO_SOCKET:
861
case PTR_TO_SOCK_COMMON:
862
convert_ctx_access = bpf_sock_convert_ctx_access;
863
break;
864
case PTR_TO_TCP_SOCK:
865
convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
866
break;
867
case PTR_TO_XDP_SOCK:
868
convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
869
break;
870
case PTR_TO_BTF_ID:
871
case PTR_TO_BTF_ID | PTR_UNTRUSTED:
872
/* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
873
* PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
874
* be said once it is marked PTR_UNTRUSTED, hence we must handle
875
* any faults for loads into such types. BPF_WRITE is disallowed
876
* for this case.
877
*/
878
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
879
case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
880
if (type == BPF_READ) {
881
if (BPF_MODE(insn->code) == BPF_MEM)
882
insn->code = BPF_LDX | BPF_PROBE_MEM |
883
BPF_SIZE((insn)->code);
884
else
885
insn->code = BPF_LDX | BPF_PROBE_MEMSX |
886
BPF_SIZE((insn)->code);
887
env->prog->aux->num_exentries++;
888
}
889
continue;
890
case PTR_TO_ARENA:
891
if (BPF_MODE(insn->code) == BPF_MEMSX) {
892
if (!bpf_jit_supports_insn(insn, true)) {
893
verbose(env, "sign extending loads from arena are not supported yet\n");
894
return -EOPNOTSUPP;
895
}
896
insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code);
897
} else {
898
insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
899
}
900
env->prog->aux->num_exentries++;
901
continue;
902
default:
903
continue;
904
}
905
906
ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
907
size = BPF_LDST_BYTES(insn);
908
mode = BPF_MODE(insn->code);
909
910
/* If the read access is a narrower load of the field,
911
* convert to a 4/8-byte load, to minimum program type specific
912
* convert_ctx_access changes. If conversion is successful,
913
* we will apply proper mask to the result.
914
*/
915
is_narrower_load = size < ctx_field_size;
916
size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
917
off = insn->off;
918
if (is_narrower_load) {
919
u8 size_code;
920
921
if (type == BPF_WRITE) {
922
verifier_bug(env, "narrow ctx access misconfigured");
923
return -EFAULT;
924
}
925
926
size_code = BPF_H;
927
if (ctx_field_size == 4)
928
size_code = BPF_W;
929
else if (ctx_field_size == 8)
930
size_code = BPF_DW;
931
932
insn->off = off & ~(size_default - 1);
933
insn->code = BPF_LDX | BPF_MEM | size_code;
934
}
935
936
target_size = 0;
937
cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
938
&target_size);
939
if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
940
(ctx_field_size && !target_size)) {
941
verifier_bug(env, "error during ctx access conversion (%d)", cnt);
942
return -EFAULT;
943
}
944
945
if (is_narrower_load && size < target_size) {
946
u8 shift = bpf_ctx_narrow_access_offset(
947
off, size, size_default) * 8;
948
if (shift && cnt + 1 >= INSN_BUF_SIZE) {
949
verifier_bug(env, "narrow ctx load misconfigured");
950
return -EFAULT;
951
}
952
if (ctx_field_size <= 4) {
953
if (shift)
954
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
955
insn->dst_reg,
956
shift);
957
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
958
(1 << size * 8) - 1);
959
} else {
960
if (shift)
961
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
962
insn->dst_reg,
963
shift);
964
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
965
(1ULL << size * 8) - 1);
966
}
967
}
968
if (mode == BPF_MEMSX)
969
insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
970
insn->dst_reg, insn->dst_reg,
971
size * 8, 0);
972
973
patch_insn_buf:
974
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
975
if (!new_prog)
976
return -ENOMEM;
977
978
delta += cnt - 1;
979
980
/* keep walking new program and skip insns we just inserted */
981
env->prog = new_prog;
982
insn = new_prog->insnsi + i + delta;
983
}
984
985
return 0;
986
}
987
988
static u32 *bpf_dup_subprog_starts(struct bpf_verifier_env *env)
989
{
990
u32 *starts = NULL;
991
992
starts = kvmalloc_objs(u32, env->subprog_cnt, GFP_KERNEL_ACCOUNT);
993
if (starts) {
994
for (int i = 0; i < env->subprog_cnt; i++)
995
starts[i] = env->subprog_info[i].start;
996
}
997
return starts;
998
}
999
1000
static void bpf_restore_subprog_starts(struct bpf_verifier_env *env, u32 *orig_starts)
1001
{
1002
for (int i = 0; i < env->subprog_cnt; i++)
1003
env->subprog_info[i].start = orig_starts[i];
1004
/* restore the start of fake 'exit' subprog as well */
1005
env->subprog_info[env->subprog_cnt].start = env->prog->len;
1006
}
1007
1008
struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env)
1009
{
1010
size_t size;
1011
void *new_aux;
1012
1013
size = array_size(sizeof(struct bpf_insn_aux_data), env->prog->len);
1014
new_aux = __vmalloc(size, GFP_KERNEL_ACCOUNT);
1015
if (new_aux)
1016
memcpy(new_aux, env->insn_aux_data, size);
1017
return new_aux;
1018
}
1019
1020
void bpf_restore_insn_aux_data(struct bpf_verifier_env *env,
1021
struct bpf_insn_aux_data *orig_insn_aux)
1022
{
1023
/* the expanded elements are zero-filled, so no special handling is required */
1024
vfree(env->insn_aux_data);
1025
env->insn_aux_data = orig_insn_aux;
1026
}
1027
1028
static int jit_subprogs(struct bpf_verifier_env *env)
1029
{
1030
struct bpf_prog *prog = env->prog, **func, *tmp;
1031
int i, j, subprog_start, subprog_end = 0, len, subprog;
1032
struct bpf_map *map_ptr;
1033
struct bpf_insn *insn;
1034
void *old_bpf_func;
1035
int err, num_exentries;
1036
1037
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1038
if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
1039
continue;
1040
1041
/* Upon error here we cannot fall back to interpreter but
1042
* need a hard reject of the program. Thus -EFAULT is
1043
* propagated in any case.
1044
*/
1045
subprog = bpf_find_subprog(env, i + insn->imm + 1);
1046
if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
1047
i + insn->imm + 1))
1048
return -EFAULT;
1049
/* temporarily remember subprog id inside insn instead of
1050
* aux_data, since next loop will split up all insns into funcs
1051
*/
1052
insn->off = subprog;
1053
/* remember original imm in case JIT fails and fallback
1054
* to interpreter will be needed
1055
*/
1056
env->insn_aux_data[i].call_imm = insn->imm;
1057
/* point imm to __bpf_call_base+1 from JITs point of view */
1058
insn->imm = 1;
1059
if (bpf_pseudo_func(insn)) {
1060
#if defined(MODULES_VADDR)
1061
u64 addr = MODULES_VADDR;
1062
#else
1063
u64 addr = VMALLOC_START;
1064
#endif
1065
/* jit (e.g. x86_64) may emit fewer instructions
1066
* if it learns a u32 imm is the same as a u64 imm.
1067
* Set close enough to possible prog address.
1068
*/
1069
insn[0].imm = (u32)addr;
1070
insn[1].imm = addr >> 32;
1071
}
1072
}
1073
1074
err = bpf_prog_alloc_jited_linfo(prog);
1075
if (err)
1076
goto out_undo_insn;
1077
1078
err = -ENOMEM;
1079
func = kzalloc_objs(prog, env->subprog_cnt);
1080
if (!func)
1081
goto out_undo_insn;
1082
1083
for (i = 0; i < env->subprog_cnt; i++) {
1084
subprog_start = subprog_end;
1085
subprog_end = env->subprog_info[i + 1].start;
1086
1087
len = subprog_end - subprog_start;
1088
/* bpf_prog_run() doesn't call subprogs directly,
1089
* hence main prog stats include the runtime of subprogs.
1090
* subprogs don't have IDs and not reachable via prog_get_next_id
1091
* func[i]->stats will never be accessed and stays NULL
1092
*/
1093
func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
1094
if (!func[i])
1095
goto out_free;
1096
memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
1097
len * sizeof(struct bpf_insn));
1098
func[i]->type = prog->type;
1099
func[i]->len = len;
1100
if (bpf_prog_calc_tag(func[i]))
1101
goto out_free;
1102
func[i]->is_func = 1;
1103
func[i]->sleepable = prog->sleepable;
1104
func[i]->blinded = prog->blinded;
1105
func[i]->aux->func_idx = i;
1106
/* Below members will be freed only at prog->aux */
1107
func[i]->aux->btf = prog->aux->btf;
1108
func[i]->aux->subprog_start = subprog_start;
1109
func[i]->aux->func_info = prog->aux->func_info;
1110
func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
1111
func[i]->aux->poke_tab = prog->aux->poke_tab;
1112
func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
1113
func[i]->aux->main_prog_aux = prog->aux;
1114
1115
for (j = 0; j < prog->aux->size_poke_tab; j++) {
1116
struct bpf_jit_poke_descriptor *poke;
1117
1118
poke = &prog->aux->poke_tab[j];
1119
if (poke->insn_idx < subprog_end &&
1120
poke->insn_idx >= subprog_start)
1121
poke->aux = func[i]->aux;
1122
}
1123
1124
func[i]->aux->name[0] = 'F';
1125
func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1126
if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE)
1127
func[i]->aux->jits_use_priv_stack = true;
1128
1129
func[i]->jit_requested = 1;
1130
func[i]->blinding_requested = prog->blinding_requested;
1131
func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
1132
func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
1133
func[i]->aux->linfo = prog->aux->linfo;
1134
func[i]->aux->nr_linfo = prog->aux->nr_linfo;
1135
func[i]->aux->jited_linfo = prog->aux->jited_linfo;
1136
func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
1137
func[i]->aux->arena = prog->aux->arena;
1138
func[i]->aux->used_maps = env->used_maps;
1139
func[i]->aux->used_map_cnt = env->used_map_cnt;
1140
num_exentries = 0;
1141
insn = func[i]->insnsi;
1142
for (j = 0; j < func[i]->len; j++, insn++) {
1143
if (BPF_CLASS(insn->code) == BPF_LDX &&
1144
(BPF_MODE(insn->code) == BPF_PROBE_MEM ||
1145
BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
1146
BPF_MODE(insn->code) == BPF_PROBE_MEM32SX ||
1147
BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
1148
num_exentries++;
1149
if ((BPF_CLASS(insn->code) == BPF_STX ||
1150
BPF_CLASS(insn->code) == BPF_ST) &&
1151
BPF_MODE(insn->code) == BPF_PROBE_MEM32)
1152
num_exentries++;
1153
if (BPF_CLASS(insn->code) == BPF_STX &&
1154
BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
1155
num_exentries++;
1156
}
1157
func[i]->aux->num_exentries = num_exentries;
1158
func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
1159
func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
1160
func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
1161
func[i]->aux->might_sleep = env->subprog_info[i].might_sleep;
1162
func[i]->aux->token = prog->aux->token;
1163
if (!i)
1164
func[i]->aux->exception_boundary = env->seen_exception;
1165
func[i] = bpf_int_jit_compile(env, func[i]);
1166
if (!func[i]->jited) {
1167
err = -ENOTSUPP;
1168
goto out_free;
1169
}
1170
cond_resched();
1171
}
1172
1173
/* at this point all bpf functions were successfully JITed
1174
* now populate all bpf_calls with correct addresses and
1175
* run last pass of JIT
1176
*/
1177
for (i = 0; i < env->subprog_cnt; i++) {
1178
insn = func[i]->insnsi;
1179
for (j = 0; j < func[i]->len; j++, insn++) {
1180
if (bpf_pseudo_func(insn)) {
1181
subprog = insn->off;
1182
insn[0].imm = (u32)(long)func[subprog]->bpf_func;
1183
insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
1184
continue;
1185
}
1186
if (!bpf_pseudo_call(insn))
1187
continue;
1188
subprog = insn->off;
1189
insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
1190
}
1191
1192
/* we use the aux data to keep a list of the start addresses
1193
* of the JITed images for each function in the program
1194
*
1195
* for some architectures, such as powerpc64, the imm field
1196
* might not be large enough to hold the offset of the start
1197
* address of the callee's JITed image from __bpf_call_base
1198
*
1199
* in such cases, we can lookup the start address of a callee
1200
* by using its subprog id, available from the off field of
1201
* the call instruction, as an index for this list
1202
*/
1203
func[i]->aux->func = func;
1204
func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
1205
func[i]->aux->real_func_cnt = env->subprog_cnt;
1206
}
1207
for (i = 0; i < env->subprog_cnt; i++) {
1208
old_bpf_func = func[i]->bpf_func;
1209
tmp = bpf_int_jit_compile(env, func[i]);
1210
if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
1211
verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
1212
err = -ENOTSUPP;
1213
goto out_free;
1214
}
1215
cond_resched();
1216
}
1217
1218
/*
1219
* Cleanup func[i]->aux fields which aren't required
1220
* or can become invalid in future
1221
*/
1222
for (i = 0; i < env->subprog_cnt; i++) {
1223
func[i]->aux->used_maps = NULL;
1224
func[i]->aux->used_map_cnt = 0;
1225
}
1226
1227
/* finally lock prog and jit images for all functions and
1228
* populate kallsysm. Begin at the first subprogram, since
1229
* bpf_prog_load will add the kallsyms for the main program.
1230
*/
1231
for (i = 1; i < env->subprog_cnt; i++) {
1232
err = bpf_prog_lock_ro(func[i]);
1233
if (err)
1234
goto out_free;
1235
}
1236
1237
for (i = 1; i < env->subprog_cnt; i++)
1238
bpf_prog_kallsyms_add(func[i]);
1239
1240
/* Last step: make now unused interpreter insns from main
1241
* prog consistent for later dump requests, so they can
1242
* later look the same as if they were interpreted only.
1243
*/
1244
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1245
if (bpf_pseudo_func(insn)) {
1246
insn[0].imm = env->insn_aux_data[i].call_imm;
1247
insn[1].imm = insn->off;
1248
insn->off = 0;
1249
continue;
1250
}
1251
if (!bpf_pseudo_call(insn))
1252
continue;
1253
insn->off = env->insn_aux_data[i].call_imm;
1254
subprog = bpf_find_subprog(env, i + insn->off + 1);
1255
insn->imm = subprog;
1256
}
1257
1258
prog->jited = 1;
1259
prog->bpf_func = func[0]->bpf_func;
1260
prog->jited_len = func[0]->jited_len;
1261
prog->aux->extable = func[0]->aux->extable;
1262
prog->aux->num_exentries = func[0]->aux->num_exentries;
1263
prog->aux->func = func;
1264
prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
1265
prog->aux->real_func_cnt = env->subprog_cnt;
1266
prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
1267
prog->aux->exception_boundary = func[0]->aux->exception_boundary;
1268
bpf_prog_jit_attempt_done(prog);
1269
return 0;
1270
out_free:
1271
/* We failed JIT'ing, so at this point we need to unregister poke
1272
* descriptors from subprogs, so that kernel is not attempting to
1273
* patch it anymore as we're freeing the subprog JIT memory.
1274
*/
1275
for (i = 0; i < prog->aux->size_poke_tab; i++) {
1276
map_ptr = prog->aux->poke_tab[i].tail_call.map;
1277
map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
1278
}
1279
/* At this point we're guaranteed that poke descriptors are not
1280
* live anymore. We can just unlink its descriptor table as it's
1281
* released with the main prog.
1282
*/
1283
for (i = 0; i < env->subprog_cnt; i++) {
1284
if (!func[i])
1285
continue;
1286
func[i]->aux->poke_tab = NULL;
1287
bpf_jit_free(func[i]);
1288
}
1289
kfree(func);
1290
out_undo_insn:
1291
bpf_prog_jit_attempt_done(prog);
1292
return err;
1293
}
1294
1295
int bpf_jit_subprogs(struct bpf_verifier_env *env)
1296
{
1297
int err, i;
1298
bool blinded = false;
1299
struct bpf_insn *insn;
1300
struct bpf_prog *prog, *orig_prog;
1301
struct bpf_insn_aux_data *orig_insn_aux;
1302
u32 *orig_subprog_starts;
1303
1304
if (env->subprog_cnt <= 1)
1305
return 0;
1306
1307
prog = orig_prog = env->prog;
1308
if (bpf_prog_need_blind(prog)) {
1309
orig_insn_aux = bpf_dup_insn_aux_data(env);
1310
if (!orig_insn_aux) {
1311
err = -ENOMEM;
1312
goto out_cleanup;
1313
}
1314
orig_subprog_starts = bpf_dup_subprog_starts(env);
1315
if (!orig_subprog_starts) {
1316
vfree(orig_insn_aux);
1317
err = -ENOMEM;
1318
goto out_cleanup;
1319
}
1320
prog = bpf_jit_blind_constants(env, prog);
1321
if (IS_ERR(prog)) {
1322
err = -ENOMEM;
1323
prog = orig_prog;
1324
goto out_restore;
1325
}
1326
blinded = true;
1327
}
1328
1329
err = jit_subprogs(env);
1330
if (err)
1331
goto out_jit_err;
1332
1333
if (blinded) {
1334
bpf_jit_prog_release_other(prog, orig_prog);
1335
kvfree(orig_subprog_starts);
1336
vfree(orig_insn_aux);
1337
}
1338
1339
return 0;
1340
1341
out_jit_err:
1342
if (blinded) {
1343
bpf_jit_prog_release_other(orig_prog, prog);
1344
/* roll back to the clean original prog */
1345
prog = env->prog = orig_prog;
1346
goto out_restore;
1347
} else {
1348
if (err != -EFAULT) {
1349
/*
1350
* We will fall back to interpreter mode when err is not -EFAULT, before
1351
* that, insn->off and insn->imm should be restored to their original
1352
* values since they were modified by jit_subprogs.
1353
*/
1354
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1355
if (!bpf_pseudo_call(insn))
1356
continue;
1357
insn->off = 0;
1358
insn->imm = env->insn_aux_data[i].call_imm;
1359
}
1360
}
1361
goto out_cleanup;
1362
}
1363
1364
out_restore:
1365
bpf_restore_subprog_starts(env, orig_subprog_starts);
1366
bpf_restore_insn_aux_data(env, orig_insn_aux);
1367
kvfree(orig_subprog_starts);
1368
out_cleanup:
1369
/* cleanup main prog to be interpreted */
1370
prog->jit_requested = 0;
1371
prog->blinding_requested = 0;
1372
return err;
1373
}
1374
1375
int bpf_fixup_call_args(struct bpf_verifier_env *env)
1376
{
1377
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1378
struct bpf_prog *prog = env->prog;
1379
struct bpf_insn *insn = prog->insnsi;
1380
bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
1381
int i, depth;
1382
#endif
1383
int err = 0;
1384
1385
if (env->prog->jit_requested &&
1386
!bpf_prog_is_offloaded(env->prog->aux)) {
1387
err = bpf_jit_subprogs(env);
1388
if (err == 0)
1389
return 0;
1390
if (err == -EFAULT)
1391
return err;
1392
}
1393
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1394
if (has_kfunc_call) {
1395
verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
1396
return -EINVAL;
1397
}
1398
if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
1399
/* When JIT fails the progs with bpf2bpf calls and tail_calls
1400
* have to be rejected, since interpreter doesn't support them yet.
1401
*/
1402
verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
1403
return -EINVAL;
1404
}
1405
for (i = 0; i < prog->len; i++, insn++) {
1406
if (bpf_pseudo_func(insn)) {
1407
/* When JIT fails the progs with callback calls
1408
* have to be rejected, since interpreter doesn't support them yet.
1409
*/
1410
verbose(env, "callbacks are not allowed in non-JITed programs\n");
1411
return -EINVAL;
1412
}
1413
1414
if (!bpf_pseudo_call(insn))
1415
continue;
1416
depth = get_callee_stack_depth(env, insn, i);
1417
if (depth < 0)
1418
return depth;
1419
bpf_patch_call_args(insn, depth);
1420
}
1421
err = 0;
1422
#endif
1423
return err;
1424
}
1425
1426
1427
/* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
1428
static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
1429
{
1430
struct bpf_subprog_info *info = env->subprog_info;
1431
int cnt = env->subprog_cnt;
1432
struct bpf_prog *prog;
1433
1434
/* We only reserve one slot for hidden subprogs in subprog_info. */
1435
if (env->hidden_subprog_cnt) {
1436
verifier_bug(env, "only one hidden subprog supported");
1437
return -EFAULT;
1438
}
1439
/* We're not patching any existing instruction, just appending the new
1440
* ones for the hidden subprog. Hence all of the adjustment operations
1441
* in bpf_patch_insn_data are no-ops.
1442
*/
1443
prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
1444
if (!prog)
1445
return -ENOMEM;
1446
env->prog = prog;
1447
info[cnt + 1].start = info[cnt].start;
1448
info[cnt].start = prog->len - len + 1;
1449
env->subprog_cnt++;
1450
env->hidden_subprog_cnt++;
1451
return 0;
1452
}
1453
1454
/* Do various post-verification rewrites in a single program pass.
1455
* These rewrites simplify JIT and interpreter implementations.
1456
*/
1457
int bpf_do_misc_fixups(struct bpf_verifier_env *env)
1458
{
1459
struct bpf_prog *prog = env->prog;
1460
enum bpf_attach_type eatype = prog->expected_attach_type;
1461
enum bpf_prog_type prog_type = resolve_prog_type(prog);
1462
struct bpf_insn *insn = prog->insnsi;
1463
const struct bpf_func_proto *fn;
1464
const int insn_cnt = prog->len;
1465
const struct bpf_map_ops *ops;
1466
struct bpf_insn_aux_data *aux;
1467
struct bpf_insn *insn_buf = env->insn_buf;
1468
struct bpf_prog *new_prog;
1469
struct bpf_map *map_ptr;
1470
int i, ret, cnt, delta = 0, cur_subprog = 0;
1471
struct bpf_subprog_info *subprogs = env->subprog_info;
1472
u16 stack_depth = subprogs[cur_subprog].stack_depth;
1473
u16 stack_depth_extra = 0;
1474
1475
if (env->seen_exception && !env->exception_callback_subprog) {
1476
struct bpf_insn *patch = insn_buf;
1477
1478
*patch++ = env->prog->insnsi[insn_cnt - 1];
1479
*patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
1480
*patch++ = BPF_EXIT_INSN();
1481
ret = add_hidden_subprog(env, insn_buf, patch - insn_buf);
1482
if (ret < 0)
1483
return ret;
1484
prog = env->prog;
1485
insn = prog->insnsi;
1486
1487
env->exception_callback_subprog = env->subprog_cnt - 1;
1488
/* Don't update insn_cnt, as add_hidden_subprog always appends insns */
1489
bpf_mark_subprog_exc_cb(env, env->exception_callback_subprog);
1490
}
1491
1492
for (i = 0; i < insn_cnt;) {
1493
if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
1494
if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
1495
(((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
1496
/* convert to 32-bit mov that clears upper 32-bit */
1497
insn->code = BPF_ALU | BPF_MOV | BPF_X;
1498
/* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
1499
insn->off = 0;
1500
insn->imm = 0;
1501
} /* cast from as(0) to as(1) should be handled by JIT */
1502
goto next_insn;
1503
}
1504
1505
if (env->insn_aux_data[i + delta].needs_zext)
1506
/* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
1507
insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
1508
1509
/* Make sdiv/smod divide-by-minus-one exceptions impossible. */
1510
if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
1511
insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
1512
insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
1513
insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
1514
insn->off == 1 && insn->imm == -1) {
1515
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1516
bool isdiv = BPF_OP(insn->code) == BPF_DIV;
1517
struct bpf_insn *patch = insn_buf;
1518
1519
if (isdiv)
1520
*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1521
BPF_NEG | BPF_K, insn->dst_reg,
1522
0, 0, 0);
1523
else
1524
*patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
1525
1526
cnt = patch - insn_buf;
1527
1528
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1529
if (!new_prog)
1530
return -ENOMEM;
1531
1532
delta += cnt - 1;
1533
env->prog = prog = new_prog;
1534
insn = new_prog->insnsi + i + delta;
1535
goto next_insn;
1536
}
1537
1538
/* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
1539
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
1540
insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
1541
insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
1542
insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
1543
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1544
bool isdiv = BPF_OP(insn->code) == BPF_DIV;
1545
bool is_sdiv = isdiv && insn->off == 1;
1546
bool is_smod = !isdiv && insn->off == 1;
1547
struct bpf_insn *patch = insn_buf;
1548
1549
if (is_sdiv) {
1550
/* [R,W]x sdiv 0 -> 0
1551
* LLONG_MIN sdiv -1 -> LLONG_MIN
1552
* INT_MIN sdiv -1 -> INT_MIN
1553
*/
1554
*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1555
*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1556
BPF_ADD | BPF_K, BPF_REG_AX,
1557
0, 0, 1);
1558
*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1559
BPF_JGT | BPF_K, BPF_REG_AX,
1560
0, 4, 1);
1561
*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1562
BPF_JEQ | BPF_K, BPF_REG_AX,
1563
0, 1, 0);
1564
*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1565
BPF_MOV | BPF_K, insn->dst_reg,
1566
0, 0, 0);
1567
/* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
1568
*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1569
BPF_NEG | BPF_K, insn->dst_reg,
1570
0, 0, 0);
1571
*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1572
*patch++ = *insn;
1573
cnt = patch - insn_buf;
1574
} else if (is_smod) {
1575
/* [R,W]x mod 0 -> [R,W]x */
1576
/* [R,W]x mod -1 -> 0 */
1577
*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1578
*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1579
BPF_ADD | BPF_K, BPF_REG_AX,
1580
0, 0, 1);
1581
*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1582
BPF_JGT | BPF_K, BPF_REG_AX,
1583
0, 3, 1);
1584
*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1585
BPF_JEQ | BPF_K, BPF_REG_AX,
1586
0, 3 + (is64 ? 0 : 1), 1);
1587
*patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
1588
*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1589
*patch++ = *insn;
1590
1591
if (!is64) {
1592
*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1593
*patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
1594
}
1595
cnt = patch - insn_buf;
1596
} else if (isdiv) {
1597
/* [R,W]x div 0 -> 0 */
1598
*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1599
BPF_JNE | BPF_K, insn->src_reg,
1600
0, 2, 0);
1601
*patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg);
1602
*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1603
*patch++ = *insn;
1604
cnt = patch - insn_buf;
1605
} else {
1606
/* [R,W]x mod 0 -> [R,W]x */
1607
*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1608
BPF_JEQ | BPF_K, insn->src_reg,
1609
0, 1 + (is64 ? 0 : 1), 0);
1610
*patch++ = *insn;
1611
1612
if (!is64) {
1613
*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1614
*patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
1615
}
1616
cnt = patch - insn_buf;
1617
}
1618
1619
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1620
if (!new_prog)
1621
return -ENOMEM;
1622
1623
delta += cnt - 1;
1624
env->prog = prog = new_prog;
1625
insn = new_prog->insnsi + i + delta;
1626
goto next_insn;
1627
}
1628
1629
/* Make it impossible to de-reference a userspace address */
1630
if (BPF_CLASS(insn->code) == BPF_LDX &&
1631
(BPF_MODE(insn->code) == BPF_PROBE_MEM ||
1632
BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
1633
struct bpf_insn *patch = insn_buf;
1634
u64 uaddress_limit = bpf_arch_uaddress_limit();
1635
1636
if (!uaddress_limit)
1637
goto next_insn;
1638
1639
*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1640
if (insn->off)
1641
*patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
1642
*patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
1643
*patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
1644
*patch++ = *insn;
1645
*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1646
*patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
1647
1648
cnt = patch - insn_buf;
1649
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1650
if (!new_prog)
1651
return -ENOMEM;
1652
1653
delta += cnt - 1;
1654
env->prog = prog = new_prog;
1655
insn = new_prog->insnsi + i + delta;
1656
goto next_insn;
1657
}
1658
1659
/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
1660
if (BPF_CLASS(insn->code) == BPF_LD &&
1661
(BPF_MODE(insn->code) == BPF_ABS ||
1662
BPF_MODE(insn->code) == BPF_IND)) {
1663
cnt = env->ops->gen_ld_abs(insn, insn_buf);
1664
if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
1665
verifier_bug(env, "%d insns generated for ld_abs", cnt);
1666
return -EFAULT;
1667
}
1668
1669
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1670
if (!new_prog)
1671
return -ENOMEM;
1672
1673
delta += cnt - 1;
1674
env->prog = prog = new_prog;
1675
insn = new_prog->insnsi + i + delta;
1676
goto next_insn;
1677
}
1678
1679
/* Rewrite pointer arithmetic to mitigate speculation attacks. */
1680
if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
1681
insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
1682
const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
1683
const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
1684
struct bpf_insn *patch = insn_buf;
1685
bool issrc, isneg, isimm;
1686
u32 off_reg;
1687
1688
aux = &env->insn_aux_data[i + delta];
1689
if (!aux->alu_state ||
1690
aux->alu_state == BPF_ALU_NON_POINTER)
1691
goto next_insn;
1692
1693
isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
1694
issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
1695
BPF_ALU_SANITIZE_SRC;
1696
isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
1697
1698
off_reg = issrc ? insn->src_reg : insn->dst_reg;
1699
if (isimm) {
1700
*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
1701
} else {
1702
if (isneg)
1703
*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
1704
*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
1705
*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
1706
*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
1707
*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
1708
*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
1709
*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
1710
}
1711
if (!issrc)
1712
*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
1713
insn->src_reg = BPF_REG_AX;
1714
if (isneg)
1715
insn->code = insn->code == code_add ?
1716
code_sub : code_add;
1717
*patch++ = *insn;
1718
if (issrc && isneg && !isimm)
1719
*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
1720
cnt = patch - insn_buf;
1721
1722
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1723
if (!new_prog)
1724
return -ENOMEM;
1725
1726
delta += cnt - 1;
1727
env->prog = prog = new_prog;
1728
insn = new_prog->insnsi + i + delta;
1729
goto next_insn;
1730
}
1731
1732
if (bpf_is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
1733
int stack_off_cnt = -stack_depth - 16;
1734
1735
/*
1736
* Two 8 byte slots, depth-16 stores the count, and
1737
* depth-8 stores the start timestamp of the loop.
1738
*
1739
* The starting value of count is BPF_MAX_TIMED_LOOPS
1740
* (0xffff). Every iteration loads it and subs it by 1,
1741
* until the value becomes 0 in AX (thus, 1 in stack),
1742
* after which we call arch_bpf_timed_may_goto, which
1743
* either sets AX to 0xffff to keep looping, or to 0
1744
* upon timeout. AX is then stored into the stack. In
1745
* the next iteration, we either see 0 and break out, or
1746
* continue iterating until the next time value is 0
1747
* after subtraction, rinse and repeat.
1748
*/
1749
stack_depth_extra = 16;
1750
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt);
1751
if (insn->off >= 0)
1752
insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5);
1753
else
1754
insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
1755
insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
1756
insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2);
1757
/*
1758
* AX is used as an argument to pass in stack_off_cnt
1759
* (to add to r10/fp), and also as the return value of
1760
* the call to arch_bpf_timed_may_goto.
1761
*/
1762
insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt);
1763
insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto);
1764
insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt);
1765
cnt = 7;
1766
1767
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1768
if (!new_prog)
1769
return -ENOMEM;
1770
1771
delta += cnt - 1;
1772
env->prog = prog = new_prog;
1773
insn = new_prog->insnsi + i + delta;
1774
goto next_insn;
1775
} else if (bpf_is_may_goto_insn(insn)) {
1776
int stack_off = -stack_depth - 8;
1777
1778
stack_depth_extra = 8;
1779
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
1780
if (insn->off >= 0)
1781
insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
1782
else
1783
insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
1784
insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
1785
insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
1786
cnt = 4;
1787
1788
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1789
if (!new_prog)
1790
return -ENOMEM;
1791
1792
delta += cnt - 1;
1793
env->prog = prog = new_prog;
1794
insn = new_prog->insnsi + i + delta;
1795
goto next_insn;
1796
}
1797
1798
if (insn->code != (BPF_JMP | BPF_CALL))
1799
goto next_insn;
1800
if (insn->src_reg == BPF_PSEUDO_CALL)
1801
goto next_insn;
1802
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1803
ret = bpf_fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
1804
if (ret)
1805
return ret;
1806
if (cnt == 0)
1807
goto next_insn;
1808
1809
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1810
if (!new_prog)
1811
return -ENOMEM;
1812
1813
delta += cnt - 1;
1814
env->prog = prog = new_prog;
1815
insn = new_prog->insnsi + i + delta;
1816
goto next_insn;
1817
}
1818
1819
/* Skip inlining the helper call if the JIT does it. */
1820
if (bpf_jit_inlines_helper_call(insn->imm))
1821
goto next_insn;
1822
1823
if (insn->imm == BPF_FUNC_get_route_realm)
1824
prog->dst_needed = 1;
1825
if (insn->imm == BPF_FUNC_get_prandom_u32)
1826
bpf_user_rnd_init_once();
1827
if (insn->imm == BPF_FUNC_override_return)
1828
prog->kprobe_override = 1;
1829
if (insn->imm == BPF_FUNC_tail_call) {
1830
/* If we tail call into other programs, we
1831
* cannot make any assumptions since they can
1832
* be replaced dynamically during runtime in
1833
* the program array.
1834
*/
1835
prog->cb_access = 1;
1836
if (!bpf_allow_tail_call_in_subprogs(env))
1837
prog->aux->stack_depth = MAX_BPF_STACK;
1838
prog->aux->max_pkt_offset = MAX_PACKET_OFF;
1839
1840
/* mark bpf_tail_call as different opcode to avoid
1841
* conditional branch in the interpreter for every normal
1842
* call and to prevent accidental JITing by JIT compiler
1843
* that doesn't support bpf_tail_call yet
1844
*/
1845
insn->imm = 0;
1846
insn->code = BPF_JMP | BPF_TAIL_CALL;
1847
1848
aux = &env->insn_aux_data[i + delta];
1849
if (env->bpf_capable && !prog->blinding_requested &&
1850
prog->jit_requested &&
1851
!bpf_map_key_poisoned(aux) &&
1852
!bpf_map_ptr_poisoned(aux) &&
1853
!bpf_map_ptr_unpriv(aux)) {
1854
struct bpf_jit_poke_descriptor desc = {
1855
.reason = BPF_POKE_REASON_TAIL_CALL,
1856
.tail_call.map = aux->map_ptr_state.map_ptr,
1857
.tail_call.key = bpf_map_key_immediate(aux),
1858
.insn_idx = i + delta,
1859
};
1860
1861
ret = bpf_jit_add_poke_descriptor(prog, &desc);
1862
if (ret < 0) {
1863
verbose(env, "adding tail call poke descriptor failed\n");
1864
return ret;
1865
}
1866
1867
insn->imm = ret + 1;
1868
goto next_insn;
1869
}
1870
1871
if (!bpf_map_ptr_unpriv(aux))
1872
goto next_insn;
1873
1874
/* instead of changing every JIT dealing with tail_call
1875
* emit two extra insns:
1876
* if (index >= max_entries) goto out;
1877
* index &= array->index_mask;
1878
* to avoid out-of-bounds cpu speculation
1879
*/
1880
if (bpf_map_ptr_poisoned(aux)) {
1881
verbose(env, "tail_call abusing map_ptr\n");
1882
return -EINVAL;
1883
}
1884
1885
map_ptr = aux->map_ptr_state.map_ptr;
1886
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
1887
map_ptr->max_entries, 2);
1888
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
1889
container_of(map_ptr,
1890
struct bpf_array,
1891
map)->index_mask);
1892
insn_buf[2] = *insn;
1893
cnt = 3;
1894
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1895
if (!new_prog)
1896
return -ENOMEM;
1897
1898
delta += cnt - 1;
1899
env->prog = prog = new_prog;
1900
insn = new_prog->insnsi + i + delta;
1901
goto next_insn;
1902
}
1903
1904
if (insn->imm == BPF_FUNC_timer_set_callback) {
1905
/* The verifier will process callback_fn as many times as necessary
1906
* with different maps and the register states prepared by
1907
* set_timer_callback_state will be accurate.
1908
*
1909
* The following use case is valid:
1910
* map1 is shared by prog1, prog2, prog3.
1911
* prog1 calls bpf_timer_init for some map1 elements
1912
* prog2 calls bpf_timer_set_callback for some map1 elements.
1913
* Those that were not bpf_timer_init-ed will return -EINVAL.
1914
* prog3 calls bpf_timer_start for some map1 elements.
1915
* Those that were not both bpf_timer_init-ed and
1916
* bpf_timer_set_callback-ed will return -EINVAL.
1917
*/
1918
struct bpf_insn ld_addrs[2] = {
1919
BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
1920
};
1921
1922
insn_buf[0] = ld_addrs[0];
1923
insn_buf[1] = ld_addrs[1];
1924
insn_buf[2] = *insn;
1925
cnt = 3;
1926
1927
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1928
if (!new_prog)
1929
return -ENOMEM;
1930
1931
delta += cnt - 1;
1932
env->prog = prog = new_prog;
1933
insn = new_prog->insnsi + i + delta;
1934
goto patch_call_imm;
1935
}
1936
1937
/* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
1938
if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
1939
/* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
1940
* bpf_mem_alloc() returns a ptr to the percpu data ptr.
1941
*/
1942
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
1943
insn_buf[1] = *insn;
1944
cnt = 2;
1945
1946
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1947
if (!new_prog)
1948
return -ENOMEM;
1949
1950
delta += cnt - 1;
1951
env->prog = prog = new_prog;
1952
insn = new_prog->insnsi + i + delta;
1953
goto patch_call_imm;
1954
}
1955
1956
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
1957
* and other inlining handlers are currently limited to 64 bit
1958
* only.
1959
*/
1960
if (prog->jit_requested && BITS_PER_LONG == 64 &&
1961
(insn->imm == BPF_FUNC_map_lookup_elem ||
1962
insn->imm == BPF_FUNC_map_update_elem ||
1963
insn->imm == BPF_FUNC_map_delete_elem ||
1964
insn->imm == BPF_FUNC_map_push_elem ||
1965
insn->imm == BPF_FUNC_map_pop_elem ||
1966
insn->imm == BPF_FUNC_map_peek_elem ||
1967
insn->imm == BPF_FUNC_redirect_map ||
1968
insn->imm == BPF_FUNC_for_each_map_elem ||
1969
insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
1970
aux = &env->insn_aux_data[i + delta];
1971
if (bpf_map_ptr_poisoned(aux))
1972
goto patch_call_imm;
1973
1974
map_ptr = aux->map_ptr_state.map_ptr;
1975
ops = map_ptr->ops;
1976
if (insn->imm == BPF_FUNC_map_lookup_elem &&
1977
ops->map_gen_lookup) {
1978
cnt = ops->map_gen_lookup(map_ptr, insn_buf);
1979
if (cnt == -EOPNOTSUPP)
1980
goto patch_map_ops_generic;
1981
if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
1982
verifier_bug(env, "%d insns generated for map lookup", cnt);
1983
return -EFAULT;
1984
}
1985
1986
new_prog = bpf_patch_insn_data(env, i + delta,
1987
insn_buf, cnt);
1988
if (!new_prog)
1989
return -ENOMEM;
1990
1991
delta += cnt - 1;
1992
env->prog = prog = new_prog;
1993
insn = new_prog->insnsi + i + delta;
1994
goto next_insn;
1995
}
1996
1997
BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
1998
(void *(*)(struct bpf_map *map, void *key))NULL));
1999
BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
2000
(long (*)(struct bpf_map *map, void *key))NULL));
2001
BUILD_BUG_ON(!__same_type(ops->map_update_elem,
2002
(long (*)(struct bpf_map *map, void *key, void *value,
2003
u64 flags))NULL));
2004
BUILD_BUG_ON(!__same_type(ops->map_push_elem,
2005
(long (*)(struct bpf_map *map, void *value,
2006
u64 flags))NULL));
2007
BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
2008
(long (*)(struct bpf_map *map, void *value))NULL));
2009
BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
2010
(long (*)(struct bpf_map *map, void *value))NULL));
2011
BUILD_BUG_ON(!__same_type(ops->map_redirect,
2012
(long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
2013
BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
2014
(long (*)(struct bpf_map *map,
2015
bpf_callback_t callback_fn,
2016
void *callback_ctx,
2017
u64 flags))NULL));
2018
BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
2019
(void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
2020
2021
patch_map_ops_generic:
2022
switch (insn->imm) {
2023
case BPF_FUNC_map_lookup_elem:
2024
insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
2025
goto next_insn;
2026
case BPF_FUNC_map_update_elem:
2027
insn->imm = BPF_CALL_IMM(ops->map_update_elem);
2028
goto next_insn;
2029
case BPF_FUNC_map_delete_elem:
2030
insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
2031
goto next_insn;
2032
case BPF_FUNC_map_push_elem:
2033
insn->imm = BPF_CALL_IMM(ops->map_push_elem);
2034
goto next_insn;
2035
case BPF_FUNC_map_pop_elem:
2036
insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
2037
goto next_insn;
2038
case BPF_FUNC_map_peek_elem:
2039
insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
2040
goto next_insn;
2041
case BPF_FUNC_redirect_map:
2042
insn->imm = BPF_CALL_IMM(ops->map_redirect);
2043
goto next_insn;
2044
case BPF_FUNC_for_each_map_elem:
2045
insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
2046
goto next_insn;
2047
case BPF_FUNC_map_lookup_percpu_elem:
2048
insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
2049
goto next_insn;
2050
}
2051
2052
goto patch_call_imm;
2053
}
2054
2055
/* Implement bpf_jiffies64 inline. */
2056
if (prog->jit_requested && BITS_PER_LONG == 64 &&
2057
insn->imm == BPF_FUNC_jiffies64) {
2058
struct bpf_insn ld_jiffies_addr[2] = {
2059
BPF_LD_IMM64(BPF_REG_0,
2060
(unsigned long)&jiffies),
2061
};
2062
2063
insn_buf[0] = ld_jiffies_addr[0];
2064
insn_buf[1] = ld_jiffies_addr[1];
2065
insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
2066
BPF_REG_0, 0);
2067
cnt = 3;
2068
2069
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
2070
cnt);
2071
if (!new_prog)
2072
return -ENOMEM;
2073
2074
delta += cnt - 1;
2075
env->prog = prog = new_prog;
2076
insn = new_prog->insnsi + i + delta;
2077
goto next_insn;
2078
}
2079
2080
#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
2081
/* Implement bpf_get_smp_processor_id() inline. */
2082
if (insn->imm == BPF_FUNC_get_smp_processor_id &&
2083
bpf_verifier_inlines_helper_call(env, insn->imm)) {
2084
/* BPF_FUNC_get_smp_processor_id inlining is an
2085
* optimization, so if cpu_number is ever
2086
* changed in some incompatible and hard to support
2087
* way, it's fine to back out this inlining logic
2088
*/
2089
#ifdef CONFIG_SMP
2090
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number);
2091
insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
2092
insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
2093
cnt = 3;
2094
#else
2095
insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
2096
cnt = 1;
2097
#endif
2098
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2099
if (!new_prog)
2100
return -ENOMEM;
2101
2102
delta += cnt - 1;
2103
env->prog = prog = new_prog;
2104
insn = new_prog->insnsi + i + delta;
2105
goto next_insn;
2106
}
2107
2108
/* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */
2109
if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) &&
2110
bpf_verifier_inlines_helper_call(env, insn->imm)) {
2111
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&current_task);
2112
insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
2113
insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
2114
cnt = 3;
2115
2116
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2117
if (!new_prog)
2118
return -ENOMEM;
2119
2120
delta += cnt - 1;
2121
env->prog = prog = new_prog;
2122
insn = new_prog->insnsi + i + delta;
2123
goto next_insn;
2124
}
2125
#endif
2126
/* Implement bpf_get_func_arg inline. */
2127
if (prog_type == BPF_PROG_TYPE_TRACING &&
2128
insn->imm == BPF_FUNC_get_func_arg) {
2129
if (eatype == BPF_TRACE_RAW_TP) {
2130
int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
2131
2132
/* skip 'void *__data' in btf_trace_##name() and save to reg0 */
2133
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
2134
cnt = 1;
2135
} else {
2136
/* Load nr_args from ctx - 8 */
2137
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2138
insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2139
cnt = 2;
2140
}
2141
insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
2142
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
2143
insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
2144
insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
2145
insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
2146
insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0);
2147
insn_buf[cnt++] = BPF_JMP_A(1);
2148
insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
2149
2150
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2151
if (!new_prog)
2152
return -ENOMEM;
2153
2154
delta += cnt - 1;
2155
env->prog = prog = new_prog;
2156
insn = new_prog->insnsi + i + delta;
2157
goto next_insn;
2158
}
2159
2160
/* Implement bpf_get_func_ret inline. */
2161
if (prog_type == BPF_PROG_TYPE_TRACING &&
2162
insn->imm == BPF_FUNC_get_func_ret) {
2163
if (eatype == BPF_TRACE_FEXIT ||
2164
eatype == BPF_TRACE_FSESSION ||
2165
eatype == BPF_MODIFY_RETURN) {
2166
/* Load nr_args from ctx - 8 */
2167
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2168
insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2169
insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
2170
insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
2171
insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
2172
insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
2173
insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
2174
cnt = 7;
2175
} else {
2176
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
2177
cnt = 1;
2178
}
2179
2180
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2181
if (!new_prog)
2182
return -ENOMEM;
2183
2184
delta += cnt - 1;
2185
env->prog = prog = new_prog;
2186
insn = new_prog->insnsi + i + delta;
2187
goto next_insn;
2188
}
2189
2190
/* Implement get_func_arg_cnt inline. */
2191
if (prog_type == BPF_PROG_TYPE_TRACING &&
2192
insn->imm == BPF_FUNC_get_func_arg_cnt) {
2193
if (eatype == BPF_TRACE_RAW_TP) {
2194
int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
2195
2196
/* skip 'void *__data' in btf_trace_##name() and save to reg0 */
2197
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
2198
cnt = 1;
2199
} else {
2200
/* Load nr_args from ctx - 8 */
2201
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2202
insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2203
cnt = 2;
2204
}
2205
2206
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2207
if (!new_prog)
2208
return -ENOMEM;
2209
2210
delta += cnt - 1;
2211
env->prog = prog = new_prog;
2212
insn = new_prog->insnsi + i + delta;
2213
goto next_insn;
2214
}
2215
2216
/* Implement bpf_get_func_ip inline. */
2217
if (prog_type == BPF_PROG_TYPE_TRACING &&
2218
insn->imm == BPF_FUNC_get_func_ip) {
2219
/* Load IP address from ctx - 16 */
2220
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
2221
2222
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
2223
if (!new_prog)
2224
return -ENOMEM;
2225
2226
env->prog = prog = new_prog;
2227
insn = new_prog->insnsi + i + delta;
2228
goto next_insn;
2229
}
2230
2231
/* Implement bpf_get_branch_snapshot inline. */
2232
if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
2233
prog->jit_requested && BITS_PER_LONG == 64 &&
2234
insn->imm == BPF_FUNC_get_branch_snapshot) {
2235
/* We are dealing with the following func protos:
2236
* u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
2237
* int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
2238
*/
2239
const u32 br_entry_size = sizeof(struct perf_branch_entry);
2240
2241
/* struct perf_branch_entry is part of UAPI and is
2242
* used as an array element, so extremely unlikely to
2243
* ever grow or shrink
2244
*/
2245
BUILD_BUG_ON(br_entry_size != 24);
2246
2247
/* if (unlikely(flags)) return -EINVAL */
2248
insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
2249
2250
/* Transform size (bytes) into number of entries (cnt = size / 24).
2251
* But to avoid expensive division instruction, we implement
2252
* divide-by-3 through multiplication, followed by further
2253
* division by 8 through 3-bit right shift.
2254
* Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
2255
* p. 227, chapter "Unsigned Division by 3" for details and proofs.
2256
*
2257
* N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
2258
*/
2259
insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
2260
insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
2261
insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
2262
2263
/* call perf_snapshot_branch_stack implementation */
2264
insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
2265
/* if (entry_cnt == 0) return -ENOENT */
2266
insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
2267
/* return entry_cnt * sizeof(struct perf_branch_entry) */
2268
insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
2269
insn_buf[7] = BPF_JMP_A(3);
2270
/* return -EINVAL; */
2271
insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
2272
insn_buf[9] = BPF_JMP_A(1);
2273
/* return -ENOENT; */
2274
insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
2275
cnt = 11;
2276
2277
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2278
if (!new_prog)
2279
return -ENOMEM;
2280
2281
delta += cnt - 1;
2282
env->prog = prog = new_prog;
2283
insn = new_prog->insnsi + i + delta;
2284
goto next_insn;
2285
}
2286
2287
/* Implement bpf_kptr_xchg inline */
2288
if (prog->jit_requested && BITS_PER_LONG == 64 &&
2289
insn->imm == BPF_FUNC_kptr_xchg &&
2290
bpf_jit_supports_ptr_xchg()) {
2291
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
2292
insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
2293
cnt = 2;
2294
2295
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2296
if (!new_prog)
2297
return -ENOMEM;
2298
2299
delta += cnt - 1;
2300
env->prog = prog = new_prog;
2301
insn = new_prog->insnsi + i + delta;
2302
goto next_insn;
2303
}
2304
patch_call_imm:
2305
fn = env->ops->get_func_proto(insn->imm, env->prog);
2306
/* all functions that have prototype and verifier allowed
2307
* programs to call them, must be real in-kernel functions
2308
*/
2309
if (!fn->func) {
2310
verifier_bug(env,
2311
"not inlined functions %s#%d is missing func",
2312
func_id_name(insn->imm), insn->imm);
2313
return -EFAULT;
2314
}
2315
insn->imm = fn->func - __bpf_call_base;
2316
next_insn:
2317
if (subprogs[cur_subprog + 1].start == i + delta + 1) {
2318
subprogs[cur_subprog].stack_depth += stack_depth_extra;
2319
subprogs[cur_subprog].stack_extra = stack_depth_extra;
2320
2321
stack_depth = subprogs[cur_subprog].stack_depth;
2322
if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
2323
verbose(env, "stack size %d(extra %d) is too large\n",
2324
stack_depth, stack_depth_extra);
2325
return -EINVAL;
2326
}
2327
cur_subprog++;
2328
stack_depth = subprogs[cur_subprog].stack_depth;
2329
stack_depth_extra = 0;
2330
}
2331
i++;
2332
insn++;
2333
}
2334
2335
env->prog->aux->stack_depth = subprogs[0].stack_depth;
2336
for (i = 0; i < env->subprog_cnt; i++) {
2337
int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1;
2338
int subprog_start = subprogs[i].start;
2339
int stack_slots = subprogs[i].stack_extra / 8;
2340
int slots = delta, cnt = 0;
2341
2342
if (!stack_slots)
2343
continue;
2344
/* We need two slots in case timed may_goto is supported. */
2345
if (stack_slots > slots) {
2346
verifier_bug(env, "stack_slots supports may_goto only");
2347
return -EFAULT;
2348
}
2349
2350
stack_depth = subprogs[i].stack_depth;
2351
if (bpf_jit_supports_timed_may_goto()) {
2352
insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
2353
BPF_MAX_TIMED_LOOPS);
2354
insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0);
2355
} else {
2356
/* Add ST insn to subprog prologue to init extra stack */
2357
insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
2358
BPF_MAX_LOOPS);
2359
}
2360
/* Copy first actual insn to preserve it */
2361
insn_buf[cnt++] = env->prog->insnsi[subprog_start];
2362
2363
new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt);
2364
if (!new_prog)
2365
return -ENOMEM;
2366
env->prog = prog = new_prog;
2367
/*
2368
* If may_goto is a first insn of a prog there could be a jmp
2369
* insn that points to it, hence adjust all such jmps to point
2370
* to insn after BPF_ST that inits may_goto count.
2371
* Adjustment will succeed because bpf_patch_insn_data() didn't fail.
2372
*/
2373
WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta));
2374
}
2375
2376
/* Since poke tab is now finalized, publish aux to tracker. */
2377
for (i = 0; i < prog->aux->size_poke_tab; i++) {
2378
map_ptr = prog->aux->poke_tab[i].tail_call.map;
2379
if (!map_ptr->ops->map_poke_track ||
2380
!map_ptr->ops->map_poke_untrack ||
2381
!map_ptr->ops->map_poke_run) {
2382
verifier_bug(env, "poke tab is misconfigured");
2383
return -EFAULT;
2384
}
2385
2386
ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
2387
if (ret < 0) {
2388
verbose(env, "tracking tail call prog failed\n");
2389
return ret;
2390
}
2391
}
2392
2393
ret = sort_kfunc_descs_by_imm_off(env);
2394
if (ret)
2395
return ret;
2396
2397
return 0;
2398
}
2399
2400
static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
2401
int position,
2402
s32 stack_base,
2403
u32 callback_subprogno,
2404
u32 *total_cnt)
2405
{
2406
s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
2407
s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
2408
s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
2409
int reg_loop_max = BPF_REG_6;
2410
int reg_loop_cnt = BPF_REG_7;
2411
int reg_loop_ctx = BPF_REG_8;
2412
2413
struct bpf_insn *insn_buf = env->insn_buf;
2414
struct bpf_prog *new_prog;
2415
u32 callback_start;
2416
u32 call_insn_offset;
2417
s32 callback_offset;
2418
u32 cnt = 0;
2419
2420
/* This represents an inlined version of bpf_iter.c:bpf_loop,
2421
* be careful to modify this code in sync.
2422
*/
2423
2424
/* Return error and jump to the end of the patch if
2425
* expected number of iterations is too big.
2426
*/
2427
insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
2428
insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
2429
insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
2430
/* spill R6, R7, R8 to use these as loop vars */
2431
insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
2432
insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
2433
insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
2434
/* initialize loop vars */
2435
insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
2436
insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
2437
insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
2438
/* loop header,
2439
* if reg_loop_cnt >= reg_loop_max skip the loop body
2440
*/
2441
insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
2442
/* callback call,
2443
* correct callback offset would be set after patching
2444
*/
2445
insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
2446
insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
2447
insn_buf[cnt++] = BPF_CALL_REL(0);
2448
/* increment loop counter */
2449
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
2450
/* jump to loop header if callback returned 0 */
2451
insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
2452
/* return value of bpf_loop,
2453
* set R0 to the number of iterations
2454
*/
2455
insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
2456
/* restore original values of R6, R7, R8 */
2457
insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
2458
insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
2459
insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
2460
2461
*total_cnt = cnt;
2462
new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
2463
if (!new_prog)
2464
return new_prog;
2465
2466
/* callback start is known only after patching */
2467
callback_start = env->subprog_info[callback_subprogno].start;
2468
/* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
2469
call_insn_offset = position + 12;
2470
callback_offset = callback_start - call_insn_offset - 1;
2471
new_prog->insnsi[call_insn_offset].imm = callback_offset;
2472
2473
return new_prog;
2474
}
2475
2476
static bool is_bpf_loop_call(struct bpf_insn *insn)
2477
{
2478
return insn->code == (BPF_JMP | BPF_CALL) &&
2479
insn->src_reg == 0 &&
2480
insn->imm == BPF_FUNC_loop;
2481
}
2482
2483
/* For all sub-programs in the program (including main) check
2484
* insn_aux_data to see if there are bpf_loop calls that require
2485
* inlining. If such calls are found the calls are replaced with a
2486
* sequence of instructions produced by `inline_bpf_loop` function and
2487
* subprog stack_depth is increased by the size of 3 registers.
2488
* This stack space is used to spill values of the R6, R7, R8. These
2489
* registers are used to store the loop bound, counter and context
2490
* variables.
2491
*/
2492
int bpf_optimize_bpf_loop(struct bpf_verifier_env *env)
2493
{
2494
struct bpf_subprog_info *subprogs = env->subprog_info;
2495
int i, cur_subprog = 0, cnt, delta = 0;
2496
struct bpf_insn *insn = env->prog->insnsi;
2497
int insn_cnt = env->prog->len;
2498
u16 stack_depth = subprogs[cur_subprog].stack_depth;
2499
u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
2500
u16 stack_depth_extra = 0;
2501
2502
for (i = 0; i < insn_cnt; i++, insn++) {
2503
struct bpf_loop_inline_state *inline_state =
2504
&env->insn_aux_data[i + delta].loop_inline_state;
2505
2506
if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
2507
struct bpf_prog *new_prog;
2508
2509
stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
2510
new_prog = inline_bpf_loop(env,
2511
i + delta,
2512
-(stack_depth + stack_depth_extra),
2513
inline_state->callback_subprogno,
2514
&cnt);
2515
if (!new_prog)
2516
return -ENOMEM;
2517
2518
delta += cnt - 1;
2519
env->prog = new_prog;
2520
insn = new_prog->insnsi + i + delta;
2521
}
2522
2523
if (subprogs[cur_subprog + 1].start == i + delta + 1) {
2524
subprogs[cur_subprog].stack_depth += stack_depth_extra;
2525
cur_subprog++;
2526
stack_depth = subprogs[cur_subprog].stack_depth;
2527
stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
2528
stack_depth_extra = 0;
2529
}
2530
}
2531
2532
env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
2533
2534
return 0;
2535
}
2536
2537
/* Remove unnecessary spill/fill pairs, members of fastcall pattern,
2538
* adjust subprograms stack depth when possible.
2539
*/
2540
int bpf_remove_fastcall_spills_fills(struct bpf_verifier_env *env)
2541
{
2542
struct bpf_subprog_info *subprog = env->subprog_info;
2543
struct bpf_insn_aux_data *aux = env->insn_aux_data;
2544
struct bpf_insn *insn = env->prog->insnsi;
2545
int insn_cnt = env->prog->len;
2546
u32 spills_num;
2547
bool modified = false;
2548
int i, j;
2549
2550
for (i = 0; i < insn_cnt; i++, insn++) {
2551
if (aux[i].fastcall_spills_num > 0) {
2552
spills_num = aux[i].fastcall_spills_num;
2553
/* NOPs would be removed by opt_remove_nops() */
2554
for (j = 1; j <= spills_num; ++j) {
2555
*(insn - j) = NOP;
2556
*(insn + j) = NOP;
2557
}
2558
modified = true;
2559
}
2560
if ((subprog + 1)->start == i + 1) {
2561
if (modified && !subprog->keep_fastcall_stack)
2562
subprog->stack_depth = -subprog->fastcall_stack_off;
2563
subprog++;
2564
modified = false;
2565
}
2566
}
2567
2568
return 0;
2569
}
2570
2571
2572