CoCalc -- backtrack.c

GitHub Repository: torvalds/linux
Path: blob/master/kernel/bpf/backtrack.c
¹⁷⁰⁸⁵² views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
3
#include <linux/bpf.h>
4
#include <linux/bpf_verifier.h>
5
#include <linux/filter.h>
6
#include <linux/bitmap.h>
7

8
#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
9

10
/* for any branch, call, exit record the history of jmps in the given state */
11
int bpf_push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
12
			 int insn_flags, u64 linked_regs)
13
{
14
	u32 cnt = cur->jmp_history_cnt;
15
	struct bpf_jmp_history_entry *p;
16
	size_t alloc_size;
17

18
	/* combine instruction flags if we already recorded this instruction */
19
	if (env->cur_hist_ent) {
20
		/* atomic instructions push insn_flags twice, for READ and
21
		 * WRITE sides, but they should agree on stack slot
22
		 */
23
		verifier_bug_if((env->cur_hist_ent->flags & insn_flags) &&
24
				(env->cur_hist_ent->flags & insn_flags) != insn_flags,
25
				env, "insn history: insn_idx %d cur flags %x new flags %x",
26
				env->insn_idx, env->cur_hist_ent->flags, insn_flags);
27
		env->cur_hist_ent->flags |= insn_flags;
28
		verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env,
29
				"insn history: insn_idx %d linked_regs: %#llx",
30
				env->insn_idx, env->cur_hist_ent->linked_regs);
31
		env->cur_hist_ent->linked_regs = linked_regs;
32
		return 0;
33
	}
34

35
	cnt++;
36
	alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
37
	p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT);
38
	if (!p)
39
		return -ENOMEM;
40
	cur->jmp_history = p;
41

42
	p = &cur->jmp_history[cnt - 1];
43
	p->idx = env->insn_idx;
44
	p->prev_idx = env->prev_insn_idx;
45
	p->flags = insn_flags;
46
	p->linked_regs = linked_regs;
47
	cur->jmp_history_cnt = cnt;
48
	env->cur_hist_ent = p;
49

50
	return 0;
51
}
52

53
static bool is_atomic_load_insn(const struct bpf_insn *insn)
54
{
55
	return BPF_CLASS(insn->code) == BPF_STX &&
56
	       BPF_MODE(insn->code) == BPF_ATOMIC &&
57
	       insn->imm == BPF_LOAD_ACQ;
58
}
59

60
static bool is_atomic_fetch_insn(const struct bpf_insn *insn)
61
{
62
	return BPF_CLASS(insn->code) == BPF_STX &&
63
	       BPF_MODE(insn->code) == BPF_ATOMIC &&
64
	       (insn->imm & BPF_FETCH);
65
}
66

67
static int insn_stack_access_spi(int insn_flags)
68
{
69
	return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
70
}
71

72
static int insn_stack_access_frameno(int insn_flags)
73
{
74
	return insn_flags & INSN_F_FRAMENO_MASK;
75
}
76

77
/* Backtrack one insn at a time. If idx is not at the top of recorded
78
 * history then previous instruction came from straight line execution.
79
 * Return -ENOENT if we exhausted all instructions within given state.
80
 *
81
 * It's legal to have a bit of a looping with the same starting and ending
82
 * insn index within the same state, e.g.: 3->4->5->3, so just because current
83
 * instruction index is the same as state's first_idx doesn't mean we are
84
 * done. If there is still some jump history left, we should keep going. We
85
 * need to take into account that we might have a jump history between given
86
 * state's parent and itself, due to checkpointing. In this case, we'll have
87
 * history entry recording a jump from last instruction of parent state and
88
 * first instruction of given state.
89
 */
90
static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
91
			     u32 *history)
92
{
93
	u32 cnt = *history;
94

95
	if (i == st->first_insn_idx) {
96
		if (cnt == 0)
97
			return -ENOENT;
98
		if (cnt == 1 && st->jmp_history[0].idx == i)
99
			return -ENOENT;
100
	}
101

102
	if (cnt && st->jmp_history[cnt - 1].idx == i) {
103
		i = st->jmp_history[cnt - 1].prev_idx;
104
		(*history)--;
105
	} else {
106
		i--;
107
	}
108
	return i;
109
}
110

111
static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
112
						        u32 hist_end, int insn_idx)
113
{
114
	if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
115
		return &st->jmp_history[hist_end - 1];
116
	return NULL;
117
}
118

119
static inline void bt_init(struct backtrack_state *bt, u32 frame)
120
{
121
	bt->frame = frame;
122
}
123

124
static inline void bt_reset(struct backtrack_state *bt)
125
{
126
	struct bpf_verifier_env *env = bt->env;
127

128
	memset(bt, 0, sizeof(*bt));
129
	bt->env = env;
130
}
131

132
static inline u32 bt_empty(struct backtrack_state *bt)
133
{
134
	u64 mask = 0;
135
	int i;
136

137
	for (i = 0; i <= bt->frame; i++)
138
		mask |= bt->reg_masks[i] | bt->stack_masks[i];
139

140
	return mask == 0;
141
}
142

143
static inline int bt_subprog_enter(struct backtrack_state *bt)
144
{
145
	if (bt->frame == MAX_CALL_FRAMES - 1) {
146
		verifier_bug(bt->env, "subprog enter from frame %d", bt->frame);
147
		return -EFAULT;
148
	}
149
	bt->frame++;
150
	return 0;
151
}
152

153
static inline int bt_subprog_exit(struct backtrack_state *bt)
154
{
155
	if (bt->frame == 0) {
156
		verifier_bug(bt->env, "subprog exit from frame 0");
157
		return -EFAULT;
158
	}
159
	bt->frame--;
160
	return 0;
161
}
162

163
static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
164
{
165
	bt->reg_masks[frame] &= ~(1 << reg);
166
}
167

168
static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
169
{
170
	bpf_bt_set_frame_reg(bt, bt->frame, reg);
171
}
172

173
static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
174
{
175
	bt_clear_frame_reg(bt, bt->frame, reg);
176
}
177

178
static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
179
{
180
	bt->stack_masks[frame] &= ~(1ull << slot);
181
}
182

183
static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
184
{
185
	return bt->reg_masks[frame];
186
}
187

188
static inline u32 bt_reg_mask(struct backtrack_state *bt)
189
{
190
	return bt->reg_masks[bt->frame];
191
}
192

193
static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
194
{
195
	return bt->stack_masks[frame];
196
}
197

198
static inline u64 bt_stack_mask(struct backtrack_state *bt)
199
{
200
	return bt->stack_masks[bt->frame];
201
}
202

203
static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
204
{
205
	return bt->reg_masks[bt->frame] & (1 << reg);
206
}
207

208

209
/* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
210
static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
211
{
212
	DECLARE_BITMAP(mask, 64);
213
	bool first = true;
214
	int i, n;
215

216
	buf[0] = '\0';
217

218
	bitmap_from_u64(mask, reg_mask);
219
	for_each_set_bit(i, mask, 32) {
220
		n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
221
		first = false;
222
		buf += n;
223
		buf_sz -= n;
224
		if (buf_sz < 0)
225
			break;
226
	}
227
}
228
/* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
229
void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
230
{
231
	DECLARE_BITMAP(mask, 64);
232
	bool first = true;
233
	int i, n;
234

235
	buf[0] = '\0';
236

237
	bitmap_from_u64(mask, stack_mask);
238
	for_each_set_bit(i, mask, 64) {
239
		n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
240
		first = false;
241
		buf += n;
242
		buf_sz -= n;
243
		if (buf_sz < 0)
244
			break;
245
	}
246
}
247

248

249
/* For given verifier state backtrack_insn() is called from the last insn to
250
 * the first insn. Its purpose is to compute a bitmask of registers and
251
 * stack slots that needs precision in the parent verifier state.
252
 *
253
 * @idx is an index of the instruction we are currently processing;
254
 * @subseq_idx is an index of the subsequent instruction that:
255
 *   - *would be* executed next, if jump history is viewed in forward order;
256
 *   - *was* processed previously during backtracking.
257
 */
258
static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
259
			  struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
260
{
261
	struct bpf_insn *insn = env->prog->insnsi + idx;
262
	u8 class = BPF_CLASS(insn->code);
263
	u8 opcode = BPF_OP(insn->code);
264
	u8 mode = BPF_MODE(insn->code);
265
	u32 dreg = insn->dst_reg;
266
	u32 sreg = insn->src_reg;
267
	u32 spi, i, fr;
268

269
	if (insn->code == 0)
270
		return 0;
271
	if (env->log.level & BPF_LOG_LEVEL2) {
272
		fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
273
		verbose(env, "mark_precise: frame%d: regs=%s ",
274
			bt->frame, env->tmp_str_buf);
275
		bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
276
		verbose(env, "stack=%s before ", env->tmp_str_buf);
277
		verbose(env, "%d: ", idx);
278
		bpf_verbose_insn(env, insn);
279
	}
280

281
	/* If there is a history record that some registers gained range at this insn,
282
	 * propagate precision marks to those registers, so that bt_is_reg_set()
283
	 * accounts for these registers.
284
	 */
285
	bpf_bt_sync_linked_regs(bt, hist);
286

287
	if (class == BPF_ALU || class == BPF_ALU64) {
288
		if (!bt_is_reg_set(bt, dreg))
289
			return 0;
290
		if (opcode == BPF_END || opcode == BPF_NEG) {
291
			/* sreg is reserved and unused
292
			 * dreg still need precision before this insn
293
			 */
294
			return 0;
295
		} else if (opcode == BPF_MOV) {
296
			if (BPF_SRC(insn->code) == BPF_X) {
297
				/* dreg = sreg or dreg = (s8, s16, s32)sreg
298
				 * dreg needs precision after this insn
299
				 * sreg needs precision before this insn
300
				 */
301
				bt_clear_reg(bt, dreg);
302
				if (sreg != BPF_REG_FP)
303
					bt_set_reg(bt, sreg);
304
			} else {
305
				/* dreg = K
306
				 * dreg needs precision after this insn.
307
				 * Corresponding register is already marked
308
				 * as precise=true in this verifier state.
309
				 * No further markings in parent are necessary
310
				 */
311
				bt_clear_reg(bt, dreg);
312
			}
313
		} else {
314
			if (BPF_SRC(insn->code) == BPF_X) {
315
				/* dreg += sreg
316
				 * both dreg and sreg need precision
317
				 * before this insn
318
				 */
319
				if (sreg != BPF_REG_FP)
320
					bt_set_reg(bt, sreg);
321
			} /* else dreg += K
322
			   * dreg still needs precision before this insn
323
			   */
324
		}
325
	} else if (class == BPF_LDX ||
326
		   is_atomic_load_insn(insn) ||
327
		   is_atomic_fetch_insn(insn)) {
328
		u32 load_reg = dreg;
329

330
		/*
331
		 * Atomic fetch operation writes the old value into
332
		 * a register (sreg or r0) and if it was tracked for
333
		 * precision, propagate to the stack slot like we do
334
		 * in regular ldx.
335
		 */
336
		if (is_atomic_fetch_insn(insn))
337
			load_reg = insn->imm == BPF_CMPXCHG ?
338
				   BPF_REG_0 : sreg;
339

340
		if (!bt_is_reg_set(bt, load_reg))
341
			return 0;
342
		bt_clear_reg(bt, load_reg);
343

344
		/* scalars can only be spilled into stack w/o losing precision.
345
		 * Load from any other memory can be zero extended.
346
		 * The desire to keep that precision is already indicated
347
		 * by 'precise' mark in corresponding register of this state.
348
		 * No further tracking necessary.
349
		 */
350
		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
351
			return 0;
352
		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
353
		 * that [fp - off] slot contains scalar that needs to be
354
		 * tracked with precision
355
		 */
356
		spi = insn_stack_access_spi(hist->flags);
357
		fr = insn_stack_access_frameno(hist->flags);
358
		bpf_bt_set_frame_slot(bt, fr, spi);
359
	} else if (class == BPF_STX || class == BPF_ST) {
360
		if (bt_is_reg_set(bt, dreg))
361
			/* stx & st shouldn't be using _scalar_ dst_reg
362
			 * to access memory. It means backtracking
363
			 * encountered a case of pointer subtraction.
364
			 */
365
			return -ENOTSUPP;
366
		/* scalars can only be spilled into stack */
367
		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
368
			return 0;
369
		spi = insn_stack_access_spi(hist->flags);
370
		fr = insn_stack_access_frameno(hist->flags);
371
		if (!bt_is_frame_slot_set(bt, fr, spi))
372
			return 0;
373
		bt_clear_frame_slot(bt, fr, spi);
374
		if (class == BPF_STX)
375
			bt_set_reg(bt, sreg);
376
	} else if (class == BPF_JMP || class == BPF_JMP32) {
377
		if (bpf_pseudo_call(insn)) {
378
			int subprog_insn_idx, subprog;
379

380
			subprog_insn_idx = idx + insn->imm + 1;
381
			subprog = bpf_find_subprog(env, subprog_insn_idx);
382
			if (subprog < 0)
383
				return -EFAULT;
384

385
			if (bpf_subprog_is_global(env, subprog)) {
386
				/* check that jump history doesn't have any
387
				 * extra instructions from subprog; the next
388
				 * instruction after call to global subprog
389
				 * should be literally next instruction in
390
				 * caller program
391
				 */
392
				verifier_bug_if(idx + 1 != subseq_idx, env,
393
						"extra insn from subprog");
394
				/* r1-r5 are invalidated after subprog call,
395
				 * so for global func call it shouldn't be set
396
				 * anymore
397
				 */
398
				if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
399
					verifier_bug(env, "global subprog unexpected regs %x",
400
						     bt_reg_mask(bt));
401
					return -EFAULT;
402
				}
403
				/* global subprog always sets R0 */
404
				bt_clear_reg(bt, BPF_REG_0);
405
				return 0;
406
			} else {
407
				/* static subprog call instruction, which
408
				 * means that we are exiting current subprog,
409
				 * so only r1-r5 could be still requested as
410
				 * precise, r0 and r6-r10 or any stack slot in
411
				 * the current frame should be zero by now
412
				 */
413
				if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
414
					verifier_bug(env, "static subprog unexpected regs %x",
415
						     bt_reg_mask(bt));
416
					return -EFAULT;
417
				}
418
				/* we are now tracking register spills correctly,
419
				 * so any instance of leftover slots is a bug
420
				 */
421
				if (bt_stack_mask(bt) != 0) {
422
					verifier_bug(env,
423
						     "static subprog leftover stack slots %llx",
424
						     bt_stack_mask(bt));
425
					return -EFAULT;
426
				}
427
				/* propagate r1-r5 to the caller */
428
				for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
429
					if (bt_is_reg_set(bt, i)) {
430
						bt_clear_reg(bt, i);
431
						bpf_bt_set_frame_reg(bt, bt->frame - 1, i);
432
					}
433
				}
434
				if (bt_subprog_exit(bt))
435
					return -EFAULT;
436
				return 0;
437
			}
438
		} else if (bpf_is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
439
			/* exit from callback subprog to callback-calling helper or
440
			 * kfunc call. Use idx/subseq_idx check to discern it from
441
			 * straight line code backtracking.
442
			 * Unlike the subprog call handling above, we shouldn't
443
			 * propagate precision of r1-r5 (if any requested), as they are
444
			 * not actually arguments passed directly to callback subprogs
445
			 */
446
			if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
447
				verifier_bug(env, "callback unexpected regs %x",
448
					     bt_reg_mask(bt));
449
				return -EFAULT;
450
			}
451
			if (bt_stack_mask(bt) != 0) {
452
				verifier_bug(env, "callback leftover stack slots %llx",
453
					     bt_stack_mask(bt));
454
				return -EFAULT;
455
			}
456
			/* clear r1-r5 in callback subprog's mask */
457
			for (i = BPF_REG_1; i <= BPF_REG_5; i++)
458
				bt_clear_reg(bt, i);
459
			if (bt_subprog_exit(bt))
460
				return -EFAULT;
461
			return 0;
462
		} else if (opcode == BPF_CALL) {
463
			/* kfunc with imm==0 is invalid and fixup_kfunc_call will
464
			 * catch this error later. Make backtracking conservative
465
			 * with ENOTSUPP.
466
			 */
467
			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
468
				return -ENOTSUPP;
469
			/* regular helper call sets R0 */
470
			bt_clear_reg(bt, BPF_REG_0);
471
			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
472
				/* if backtracking was looking for registers R1-R5
473
				 * they should have been found already.
474
				 */
475
				verifier_bug(env, "backtracking call unexpected regs %x",
476
					     bt_reg_mask(bt));
477
				return -EFAULT;
478
			}
479
			if (insn->src_reg == BPF_REG_0 && insn->imm == BPF_FUNC_tail_call
480
			    && subseq_idx - idx != 1) {
481
				if (bt_subprog_enter(bt))
482
					return -EFAULT;
483
			}
484
		} else if (opcode == BPF_EXIT) {
485
			bool r0_precise;
486

487
			/* Backtracking to a nested function call, 'idx' is a part of
488
			 * the inner frame 'subseq_idx' is a part of the outer frame.
489
			 * In case of a regular function call, instructions giving
490
			 * precision to registers R1-R5 should have been found already.
491
			 * In case of a callback, it is ok to have R1-R5 marked for
492
			 * backtracking, as these registers are set by the function
493
			 * invoking callback.
494
			 */
495
			if (subseq_idx >= 0 && bpf_calls_callback(env, subseq_idx))
496
				for (i = BPF_REG_1; i <= BPF_REG_5; i++)
497
					bt_clear_reg(bt, i);
498
			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
499
				verifier_bug(env, "backtracking exit unexpected regs %x",
500
					     bt_reg_mask(bt));
501
				return -EFAULT;
502
			}
503

504
			/* BPF_EXIT in subprog or callback always returns
505
			 * right after the call instruction, so by checking
506
			 * whether the instruction at subseq_idx-1 is subprog
507
			 * call or not we can distinguish actual exit from
508
			 * *subprog* from exit from *callback*. In the former
509
			 * case, we need to propagate r0 precision, if
510
			 * necessary. In the former we never do that.
511
			 */
512
			r0_precise = subseq_idx - 1 >= 0 &&
513
				     bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
514
				     bt_is_reg_set(bt, BPF_REG_0);
515

516
			bt_clear_reg(bt, BPF_REG_0);
517
			if (bt_subprog_enter(bt))
518
				return -EFAULT;
519

520
			if (r0_precise)
521
				bt_set_reg(bt, BPF_REG_0);
522
			/* r6-r9 and stack slots will stay set in caller frame
523
			 * bitmasks until we return back from callee(s)
524
			 */
525
			return 0;
526
		} else if (BPF_SRC(insn->code) == BPF_X) {
527
			if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
528
				return 0;
529
			/* dreg <cond> sreg
530
			 * Both dreg and sreg need precision before
531
			 * this insn. If only sreg was marked precise
532
			 * before it would be equally necessary to
533
			 * propagate it to dreg.
534
			 */
535
			if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK))
536
				bt_set_reg(bt, sreg);
537
			if (!hist || !(hist->flags & INSN_F_DST_REG_STACK))
538
				bt_set_reg(bt, dreg);
539
		} else if (BPF_SRC(insn->code) == BPF_K) {
540
			 /* dreg <cond> K
541
			  * Only dreg still needs precision before
542
			  * this insn, so for the K-based conditional
543
			  * there is nothing new to be marked.
544
			  */
545
		}
546
	} else if (class == BPF_LD) {
547
		if (!bt_is_reg_set(bt, dreg))
548
			return 0;
549
		bt_clear_reg(bt, dreg);
550
		/* It's ld_imm64 or ld_abs or ld_ind.
551
		 * For ld_imm64 no further tracking of precision
552
		 * into parent is necessary
553
		 */
554
		if (mode == BPF_IND || mode == BPF_ABS)
555
			/* to be analyzed */
556
			return -ENOTSUPP;
557
	}
558
	/* Propagate precision marks to linked registers, to account for
559
	 * registers marked as precise in this function.
560
	 */
561
	bpf_bt_sync_linked_regs(bt, hist);
562
	return 0;
563
}
564

565
/* the scalar precision tracking algorithm:
566
 * . at the start all registers have precise=false.
567
 * . scalar ranges are tracked as normal through alu and jmp insns.
568
 * . once precise value of the scalar register is used in:
569
 *   .  ptr + scalar alu
570
 *   . if (scalar cond K|scalar)
571
 *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
572
 *   backtrack through the verifier states and mark all registers and
573
 *   stack slots with spilled constants that these scalar registers
574
 *   should be precise.
575
 * . during state pruning two registers (or spilled stack slots)
576
 *   are equivalent if both are not precise.
577
 *
578
 * Note the verifier cannot simply walk register parentage chain,
579
 * since many different registers and stack slots could have been
580
 * used to compute single precise scalar.
581
 *
582
 * The approach of starting with precise=true for all registers and then
583
 * backtrack to mark a register as not precise when the verifier detects
584
 * that program doesn't care about specific value (e.g., when helper
585
 * takes register as ARG_ANYTHING parameter) is not safe.
586
 *
587
 * It's ok to walk single parentage chain of the verifier states.
588
 * It's possible that this backtracking will go all the way till 1st insn.
589
 * All other branches will be explored for needing precision later.
590
 *
591
 * The backtracking needs to deal with cases like:
592
 *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
593
 * r9 -= r8
594
 * r5 = r9
595
 * if r5 > 0x79f goto pc+7
596
 *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
597
 * r5 += 1
598
 * ...
599
 * call bpf_perf_event_output#25
600
 *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
601
 *
602
 * and this case:
603
 * r6 = 1
604
 * call foo // uses callee's r6 inside to compute r0
605
 * r0 += r6
606
 * if r0 == 0 goto
607
 *
608
 * to track above reg_mask/stack_mask needs to be independent for each frame.
609
 *
610
 * Also if parent's curframe > frame where backtracking started,
611
 * the verifier need to mark registers in both frames, otherwise callees
612
 * may incorrectly prune callers. This is similar to
613
 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
614
 *
615
 * For now backtracking falls back into conservative marking.
616
 */
617
void bpf_mark_all_scalars_precise(struct bpf_verifier_env *env,
618
				 struct bpf_verifier_state *st)
619
{
620
	struct bpf_func_state *func;
621
	struct bpf_reg_state *reg;
622
	int i, j;
623

624
	if (env->log.level & BPF_LOG_LEVEL2) {
625
		verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
626
			st->curframe);
627
	}
628

629
	/* big hammer: mark all scalars precise in this path.
630
	 * pop_stack may still get !precise scalars.
631
	 * We also skip current state and go straight to first parent state,
632
	 * because precision markings in current non-checkpointed state are
633
	 * not needed. See why in the comment in __mark_chain_precision below.
634
	 */
635
	for (st = st->parent; st; st = st->parent) {
636
		for (i = 0; i <= st->curframe; i++) {
637
			func = st->frame[i];
638
			for (j = 0; j < BPF_REG_FP; j++) {
639
				reg = &func->regs[j];
640
				if (reg->type != SCALAR_VALUE || reg->precise)
641
					continue;
642
				reg->precise = true;
643
				if (env->log.level & BPF_LOG_LEVEL2) {
644
					verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
645
						i, j);
646
				}
647
			}
648
			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
649
				if (!bpf_is_spilled_reg(&func->stack[j]))
650
					continue;
651
				reg = &func->stack[j].spilled_ptr;
652
				if (reg->type != SCALAR_VALUE || reg->precise)
653
					continue;
654
				reg->precise = true;
655
				if (env->log.level & BPF_LOG_LEVEL2) {
656
					verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
657
						i, -(j + 1) * 8);
658
				}
659
			}
660
		}
661
	}
662
}
663

664
/*
665
 * bpf_mark_chain_precision() backtracks BPF program instruction sequence and
666
 * chain of verifier states making sure that register *regno* (if regno >= 0)
667
 * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
668
 * SCALARS, as well as any other registers and slots that contribute to
669
 * a tracked state of given registers/stack slots, depending on specific BPF
670
 * assembly instructions (see backtrack_insns() for exact instruction handling
671
 * logic). This backtracking relies on recorded jmp_history and is able to
672
 * traverse entire chain of parent states. This process ends only when all the
673
 * necessary registers/slots and their transitive dependencies are marked as
674
 * precise.
675
 *
676
 * One important and subtle aspect is that precise marks *do not matter* in
677
 * the currently verified state (current state). It is important to understand
678
 * why this is the case.
679
 *
680
 * First, note that current state is the state that is not yet "checkpointed",
681
 * i.e., it is not yet put into env->explored_states, and it has no children
682
 * states as well. It's ephemeral, and can end up either a) being discarded if
683
 * compatible explored state is found at some point or BPF_EXIT instruction is
684
 * reached or b) checkpointed and put into env->explored_states, branching out
685
 * into one or more children states.
686
 *
687
 * In the former case, precise markings in current state are completely
688
 * ignored by state comparison code (see regsafe() for details). Only
689
 * checkpointed ("old") state precise markings are important, and if old
690
 * state's register/slot is precise, regsafe() assumes current state's
691
 * register/slot as precise and checks value ranges exactly and precisely. If
692
 * states turn out to be compatible, current state's necessary precise
693
 * markings and any required parent states' precise markings are enforced
694
 * after the fact with propagate_precision() logic, after the fact. But it's
695
 * important to realize that in this case, even after marking current state
696
 * registers/slots as precise, we immediately discard current state. So what
697
 * actually matters is any of the precise markings propagated into current
698
 * state's parent states, which are always checkpointed (due to b) case above).
699
 * As such, for scenario a) it doesn't matter if current state has precise
700
 * markings set or not.
701
 *
702
 * Now, for the scenario b), checkpointing and forking into child(ren)
703
 * state(s). Note that before current state gets to checkpointing step, any
704
 * processed instruction always assumes precise SCALAR register/slot
705
 * knowledge: if precise value or range is useful to prune jump branch, BPF
706
 * verifier takes this opportunity enthusiastically. Similarly, when
707
 * register's value is used to calculate offset or memory address, exact
708
 * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
709
 * what we mentioned above about state comparison ignoring precise markings
710
 * during state comparison, BPF verifier ignores and also assumes precise
711
 * markings *at will* during instruction verification process. But as verifier
712
 * assumes precision, it also propagates any precision dependencies across
713
 * parent states, which are not yet finalized, so can be further restricted
714
 * based on new knowledge gained from restrictions enforced by their children
715
 * states. This is so that once those parent states are finalized, i.e., when
716
 * they have no more active children state, state comparison logic in
717
 * is_state_visited() would enforce strict and precise SCALAR ranges, if
718
 * required for correctness.
719
 *
720
 * To build a bit more intuition, note also that once a state is checkpointed,
721
 * the path we took to get to that state is not important. This is crucial
722
 * property for state pruning. When state is checkpointed and finalized at
723
 * some instruction index, it can be correctly and safely used to "short
724
 * circuit" any *compatible* state that reaches exactly the same instruction
725
 * index. I.e., if we jumped to that instruction from a completely different
726
 * code path than original finalized state was derived from, it doesn't
727
 * matter, current state can be discarded because from that instruction
728
 * forward having a compatible state will ensure we will safely reach the
729
 * exit. States describe preconditions for further exploration, but completely
730
 * forget the history of how we got here.
731
 *
732
 * This also means that even if we needed precise SCALAR range to get to
733
 * finalized state, but from that point forward *that same* SCALAR register is
734
 * never used in a precise context (i.e., it's precise value is not needed for
735
 * correctness), it's correct and safe to mark such register as "imprecise"
736
 * (i.e., precise marking set to false). This is what we rely on when we do
737
 * not set precise marking in current state. If no child state requires
738
 * precision for any given SCALAR register, it's safe to dictate that it can
739
 * be imprecise. If any child state does require this register to be precise,
740
 * we'll mark it precise later retroactively during precise markings
741
 * propagation from child state to parent states.
742
 *
743
 * Skipping precise marking setting in current state is a mild version of
744
 * relying on the above observation. But we can utilize this property even
745
 * more aggressively by proactively forgetting any precise marking in the
746
 * current state (which we inherited from the parent state), right before we
747
 * checkpoint it and branch off into new child state. This is done by
748
 * mark_all_scalars_imprecise() to hopefully get more permissive and generic
749
 * finalized states which help in short circuiting more future states.
750
 */
751
int bpf_mark_chain_precision(struct bpf_verifier_env *env,
752
			    struct bpf_verifier_state *starting_state,
753
			    int regno,
754
			    bool *changed)
755
{
756
	struct bpf_verifier_state *st = starting_state;
757
	struct backtrack_state *bt = &env->bt;
758
	int first_idx = st->first_insn_idx;
759
	int last_idx = starting_state->insn_idx;
760
	int subseq_idx = -1;
761
	struct bpf_func_state *func;
762
	bool tmp, skip_first = true;
763
	struct bpf_reg_state *reg;
764
	int i, fr, err;
765

766
	if (!env->bpf_capable)
767
		return 0;
768

769
	changed = changed ?: &tmp;
770
	/* set frame number from which we are starting to backtrack */
771
	bt_init(bt, starting_state->curframe);
772

773
	/* Do sanity checks against current state of register and/or stack
774
	 * slot, but don't set precise flag in current state, as precision
775
	 * tracking in the current state is unnecessary.
776
	 */
777
	func = st->frame[bt->frame];
778
	if (regno >= 0) {
779
		reg = &func->regs[regno];
780
		if (reg->type != SCALAR_VALUE) {
781
			verifier_bug(env, "backtracking misuse");
782
			return -EFAULT;
783
		}
784
		bt_set_reg(bt, regno);
785
	}
786

787
	if (bt_empty(bt))
788
		return 0;
789

790
	for (;;) {
791
		DECLARE_BITMAP(mask, 64);
792
		u32 history = st->jmp_history_cnt;
793
		struct bpf_jmp_history_entry *hist;
794

795
		if (env->log.level & BPF_LOG_LEVEL2) {
796
			verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
797
				bt->frame, last_idx, first_idx, subseq_idx);
798
		}
799

800
		if (last_idx < 0) {
801
			/* we are at the entry into subprog, which
802
			 * is expected for global funcs, but only if
803
			 * requested precise registers are R1-R5
804
			 * (which are global func's input arguments)
805
			 */
806
			if (st->curframe == 0 &&
807
			    st->frame[0]->subprogno > 0 &&
808
			    st->frame[0]->callsite == BPF_MAIN_FUNC &&
809
			    bt_stack_mask(bt) == 0 &&
810
			    (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
811
				bitmap_from_u64(mask, bt_reg_mask(bt));
812
				for_each_set_bit(i, mask, 32) {
813
					reg = &st->frame[0]->regs[i];
814
					bt_clear_reg(bt, i);
815
					if (reg->type == SCALAR_VALUE) {
816
						reg->precise = true;
817
						*changed = true;
818
					}
819
				}
820
				return 0;
821
			}
822

823
			verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx",
824
				     st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
825
			return -EFAULT;
826
		}
827

828
		for (i = last_idx;;) {
829
			if (skip_first) {
830
				err = 0;
831
				skip_first = false;
832
			} else {
833
				hist = get_jmp_hist_entry(st, history, i);
834
				err = backtrack_insn(env, i, subseq_idx, hist, bt);
835
			}
836
			if (err == -ENOTSUPP) {
837
				bpf_mark_all_scalars_precise(env, starting_state);
838
				bt_reset(bt);
839
				return 0;
840
			} else if (err) {
841
				return err;
842
			}
843
			if (bt_empty(bt))
844
				/* Found assignment(s) into tracked register in this state.
845
				 * Since this state is already marked, just return.
846
				 * Nothing to be tracked further in the parent state.
847
				 */
848
				return 0;
849
			subseq_idx = i;
850
			i = get_prev_insn_idx(st, i, &history);
851
			if (i == -ENOENT)
852
				break;
853
			if (i >= env->prog->len) {
854
				/* This can happen if backtracking reached insn 0
855
				 * and there are still reg_mask or stack_mask
856
				 * to backtrack.
857
				 * It means the backtracking missed the spot where
858
				 * particular register was initialized with a constant.
859
				 */
860
				verifier_bug(env, "backtracking idx %d", i);
861
				return -EFAULT;
862
			}
863
		}
864
		st = st->parent;
865
		if (!st)
866
			break;
867

868
		for (fr = bt->frame; fr >= 0; fr--) {
869
			func = st->frame[fr];
870
			bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
871
			for_each_set_bit(i, mask, 32) {
872
				reg = &func->regs[i];
873
				if (reg->type != SCALAR_VALUE) {
874
					bt_clear_frame_reg(bt, fr, i);
875
					continue;
876
				}
877
				if (reg->precise) {
878
					bt_clear_frame_reg(bt, fr, i);
879
				} else {
880
					reg->precise = true;
881
					*changed = true;
882
				}
883
			}
884

885
			bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
886
			for_each_set_bit(i, mask, 64) {
887
				if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE,
888
						    env, "stack slot %d, total slots %d",
889
						    i, func->allocated_stack / BPF_REG_SIZE))
890
					return -EFAULT;
891

892
				if (!bpf_is_spilled_scalar_reg(&func->stack[i])) {
893
					bt_clear_frame_slot(bt, fr, i);
894
					continue;
895
				}
896
				reg = &func->stack[i].spilled_ptr;
897
				if (reg->precise) {
898
					bt_clear_frame_slot(bt, fr, i);
899
				} else {
900
					reg->precise = true;
901
					*changed = true;
902
				}
903
			}
904
			if (env->log.level & BPF_LOG_LEVEL2) {
905
				fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
906
					     bt_frame_reg_mask(bt, fr));
907
				verbose(env, "mark_precise: frame%d: parent state regs=%s ",
908
					fr, env->tmp_str_buf);
909
				bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
910
					       bt_frame_stack_mask(bt, fr));
911
				verbose(env, "stack=%s: ", env->tmp_str_buf);
912
				print_verifier_state(env, st, fr, true);
913
			}
914
		}
915

916
		if (bt_empty(bt))
917
			return 0;
918

919
		subseq_idx = first_idx;
920
		last_idx = st->last_insn_idx;
921
		first_idx = st->first_insn_idx;
922
	}
923

924
	/* if we still have requested precise regs or slots, we missed
925
	 * something (e.g., stack access through non-r10 register), so
926
	 * fallback to marking all precise
927
	 */
928
	if (!bt_empty(bt)) {
929
		bpf_mark_all_scalars_precise(env, starting_state);
930
		bt_reset(bt);
931
	}
932

933
	return 0;
934
}
935

936
Product

Resources

Company