Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/mips/net/bpf_jit_comp32.c
26444 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Just-In-Time compiler for eBPF bytecode on MIPS.
4
* Implementation of JIT functions for 32-bit CPUs.
5
*
6
* Copyright (c) 2021 Anyfi Networks AB.
7
* Author: Johan Almbladh <[email protected]>
8
*
9
* Based on code and ideas from
10
* Copyright (c) 2017 Cavium, Inc.
11
* Copyright (c) 2017 Shubham Bansal <[email protected]>
12
* Copyright (c) 2011 Mircea Gherzan <[email protected]>
13
*/
14
15
#include <linux/math64.h>
16
#include <linux/errno.h>
17
#include <linux/filter.h>
18
#include <linux/bpf.h>
19
#include <asm/cpu-features.h>
20
#include <asm/isa-rev.h>
21
#include <asm/uasm.h>
22
23
#include "bpf_jit_comp.h"
24
25
/* MIPS a4-a7 are not available in the o32 ABI */
26
#undef MIPS_R_A4
27
#undef MIPS_R_A5
28
#undef MIPS_R_A6
29
#undef MIPS_R_A7
30
31
/* Stack is 8-byte aligned in o32 ABI */
32
#define MIPS_STACK_ALIGNMENT 8
33
34
/*
35
* The top 16 bytes of a stack frame is reserved for the callee in O32 ABI.
36
* This corresponds to stack space for register arguments a0-a3.
37
*/
38
#define JIT_RESERVED_STACK 16
39
40
/* Temporary 64-bit register used by JIT */
41
#define JIT_REG_TMP MAX_BPF_JIT_REG
42
43
/*
44
* Number of prologue bytes to skip when doing a tail call.
45
* Tail call count (TCC) initialization (8 bytes) always, plus
46
* R0-to-v0 assignment (4 bytes) if big endian.
47
*/
48
#ifdef __BIG_ENDIAN
49
#define JIT_TCALL_SKIP 12
50
#else
51
#define JIT_TCALL_SKIP 8
52
#endif
53
54
/* CPU registers holding the callee return value */
55
#define JIT_RETURN_REGS \
56
(BIT(MIPS_R_V0) | \
57
BIT(MIPS_R_V1))
58
59
/* CPU registers arguments passed to callee directly */
60
#define JIT_ARG_REGS \
61
(BIT(MIPS_R_A0) | \
62
BIT(MIPS_R_A1) | \
63
BIT(MIPS_R_A2) | \
64
BIT(MIPS_R_A3))
65
66
/* CPU register arguments passed to callee on stack */
67
#define JIT_STACK_REGS \
68
(BIT(MIPS_R_T0) | \
69
BIT(MIPS_R_T1) | \
70
BIT(MIPS_R_T2) | \
71
BIT(MIPS_R_T3) | \
72
BIT(MIPS_R_T4) | \
73
BIT(MIPS_R_T5))
74
75
/* Caller-saved CPU registers */
76
#define JIT_CALLER_REGS \
77
(JIT_RETURN_REGS | \
78
JIT_ARG_REGS | \
79
JIT_STACK_REGS)
80
81
/* Callee-saved CPU registers */
82
#define JIT_CALLEE_REGS \
83
(BIT(MIPS_R_S0) | \
84
BIT(MIPS_R_S1) | \
85
BIT(MIPS_R_S2) | \
86
BIT(MIPS_R_S3) | \
87
BIT(MIPS_R_S4) | \
88
BIT(MIPS_R_S5) | \
89
BIT(MIPS_R_S6) | \
90
BIT(MIPS_R_S7) | \
91
BIT(MIPS_R_GP) | \
92
BIT(MIPS_R_FP) | \
93
BIT(MIPS_R_RA))
94
95
/*
96
* Mapping of 64-bit eBPF registers to 32-bit native MIPS registers.
97
*
98
* 1) Native register pairs are ordered according to CPU endianness, following
99
* the MIPS convention for passing 64-bit arguments and return values.
100
* 2) The eBPF return value, arguments and callee-saved registers are mapped
101
* to their native MIPS equivalents.
102
* 3) Since the 32 highest bits in the eBPF FP register are always zero,
103
* only one general-purpose register is actually needed for the mapping.
104
* We use the fp register for this purpose, and map the highest bits to
105
* the MIPS register r0 (zero).
106
* 4) We use the MIPS gp and at registers as internal temporary registers
107
* for constant blinding. The gp register is callee-saved.
108
* 5) One 64-bit temporary register is mapped for use when sign-extending
109
* immediate operands. MIPS registers t6-t9 are available to the JIT
110
* for as temporaries when implementing complex 64-bit operations.
111
*
112
* With this scheme all eBPF registers are being mapped to native MIPS
113
* registers without having to use any stack scratch space. The direct
114
* register mapping (2) simplifies the handling of function calls.
115
*/
116
static const u8 bpf2mips32[][2] = {
117
/* Return value from in-kernel function, and exit value from eBPF */
118
[BPF_REG_0] = {MIPS_R_V1, MIPS_R_V0},
119
/* Arguments from eBPF program to in-kernel function */
120
[BPF_REG_1] = {MIPS_R_A1, MIPS_R_A0},
121
[BPF_REG_2] = {MIPS_R_A3, MIPS_R_A2},
122
/* Remaining arguments, to be passed on the stack per O32 ABI */
123
[BPF_REG_3] = {MIPS_R_T1, MIPS_R_T0},
124
[BPF_REG_4] = {MIPS_R_T3, MIPS_R_T2},
125
[BPF_REG_5] = {MIPS_R_T5, MIPS_R_T4},
126
/* Callee-saved registers that in-kernel function will preserve */
127
[BPF_REG_6] = {MIPS_R_S1, MIPS_R_S0},
128
[BPF_REG_7] = {MIPS_R_S3, MIPS_R_S2},
129
[BPF_REG_8] = {MIPS_R_S5, MIPS_R_S4},
130
[BPF_REG_9] = {MIPS_R_S7, MIPS_R_S6},
131
/* Read-only frame pointer to access the eBPF stack */
132
#ifdef __BIG_ENDIAN
133
[BPF_REG_FP] = {MIPS_R_FP, MIPS_R_ZERO},
134
#else
135
[BPF_REG_FP] = {MIPS_R_ZERO, MIPS_R_FP},
136
#endif
137
/* Temporary register for blinding constants */
138
[BPF_REG_AX] = {MIPS_R_GP, MIPS_R_AT},
139
/* Temporary register for internal JIT use */
140
[JIT_REG_TMP] = {MIPS_R_T7, MIPS_R_T6},
141
};
142
143
/* Get low CPU register for a 64-bit eBPF register mapping */
144
static inline u8 lo(const u8 reg[])
145
{
146
#ifdef __BIG_ENDIAN
147
return reg[0];
148
#else
149
return reg[1];
150
#endif
151
}
152
153
/* Get high CPU register for a 64-bit eBPF register mapping */
154
static inline u8 hi(const u8 reg[])
155
{
156
#ifdef __BIG_ENDIAN
157
return reg[1];
158
#else
159
return reg[0];
160
#endif
161
}
162
163
/*
164
* Mark a 64-bit CPU register pair as clobbered, it needs to be
165
* saved/restored by the program if callee-saved.
166
*/
167
static void clobber_reg64(struct jit_context *ctx, const u8 reg[])
168
{
169
clobber_reg(ctx, reg[0]);
170
clobber_reg(ctx, reg[1]);
171
}
172
173
/* dst = imm (sign-extended) */
174
static void emit_mov_se_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
175
{
176
emit_mov_i(ctx, lo(dst), imm);
177
if (imm < 0)
178
emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
179
else
180
emit(ctx, move, hi(dst), MIPS_R_ZERO);
181
clobber_reg64(ctx, dst);
182
}
183
184
/* Zero extension, if verifier does not do it for us */
185
static void emit_zext_ver(struct jit_context *ctx, const u8 dst[])
186
{
187
if (!ctx->program->aux->verifier_zext) {
188
emit(ctx, move, hi(dst), MIPS_R_ZERO);
189
clobber_reg(ctx, hi(dst));
190
}
191
}
192
193
/* Load delay slot, if ISA mandates it */
194
static void emit_load_delay(struct jit_context *ctx)
195
{
196
if (!cpu_has_mips_2_3_4_5_r)
197
emit(ctx, nop);
198
}
199
200
/* ALU immediate operation (64-bit) */
201
static void emit_alu_i64(struct jit_context *ctx,
202
const u8 dst[], s32 imm, u8 op)
203
{
204
u8 src = MIPS_R_T6;
205
206
/*
207
* ADD/SUB with all but the max negative imm can be handled by
208
* inverting the operation and the imm value, saving one insn.
209
*/
210
if (imm > S32_MIN && imm < 0)
211
switch (op) {
212
case BPF_ADD:
213
op = BPF_SUB;
214
imm = -imm;
215
break;
216
case BPF_SUB:
217
op = BPF_ADD;
218
imm = -imm;
219
break;
220
}
221
222
/* Move immediate to temporary register */
223
emit_mov_i(ctx, src, imm);
224
225
switch (op) {
226
/* dst = dst + imm */
227
case BPF_ADD:
228
emit(ctx, addu, lo(dst), lo(dst), src);
229
emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
230
emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
231
if (imm < 0)
232
emit(ctx, addiu, hi(dst), hi(dst), -1);
233
break;
234
/* dst = dst - imm */
235
case BPF_SUB:
236
emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
237
emit(ctx, subu, lo(dst), lo(dst), src);
238
emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
239
if (imm < 0)
240
emit(ctx, addiu, hi(dst), hi(dst), 1);
241
break;
242
/* dst = dst | imm */
243
case BPF_OR:
244
emit(ctx, or, lo(dst), lo(dst), src);
245
if (imm < 0)
246
emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
247
break;
248
/* dst = dst & imm */
249
case BPF_AND:
250
emit(ctx, and, lo(dst), lo(dst), src);
251
if (imm >= 0)
252
emit(ctx, move, hi(dst), MIPS_R_ZERO);
253
break;
254
/* dst = dst ^ imm */
255
case BPF_XOR:
256
emit(ctx, xor, lo(dst), lo(dst), src);
257
if (imm < 0) {
258
emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
259
emit(ctx, addiu, hi(dst), hi(dst), -1);
260
}
261
break;
262
}
263
clobber_reg64(ctx, dst);
264
}
265
266
/* ALU register operation (64-bit) */
267
static void emit_alu_r64(struct jit_context *ctx,
268
const u8 dst[], const u8 src[], u8 op)
269
{
270
switch (BPF_OP(op)) {
271
/* dst = dst + src */
272
case BPF_ADD:
273
if (src == dst) {
274
emit(ctx, srl, MIPS_R_T9, lo(dst), 31);
275
emit(ctx, addu, lo(dst), lo(dst), lo(dst));
276
} else {
277
emit(ctx, addu, lo(dst), lo(dst), lo(src));
278
emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
279
}
280
emit(ctx, addu, hi(dst), hi(dst), hi(src));
281
emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
282
break;
283
/* dst = dst - src */
284
case BPF_SUB:
285
emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
286
emit(ctx, subu, lo(dst), lo(dst), lo(src));
287
emit(ctx, subu, hi(dst), hi(dst), hi(src));
288
emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
289
break;
290
/* dst = dst | src */
291
case BPF_OR:
292
emit(ctx, or, lo(dst), lo(dst), lo(src));
293
emit(ctx, or, hi(dst), hi(dst), hi(src));
294
break;
295
/* dst = dst & src */
296
case BPF_AND:
297
emit(ctx, and, lo(dst), lo(dst), lo(src));
298
emit(ctx, and, hi(dst), hi(dst), hi(src));
299
break;
300
/* dst = dst ^ src */
301
case BPF_XOR:
302
emit(ctx, xor, lo(dst), lo(dst), lo(src));
303
emit(ctx, xor, hi(dst), hi(dst), hi(src));
304
break;
305
}
306
clobber_reg64(ctx, dst);
307
}
308
309
/* ALU invert (64-bit) */
310
static void emit_neg_i64(struct jit_context *ctx, const u8 dst[])
311
{
312
emit(ctx, sltu, MIPS_R_T9, MIPS_R_ZERO, lo(dst));
313
emit(ctx, subu, lo(dst), MIPS_R_ZERO, lo(dst));
314
emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
315
emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
316
317
clobber_reg64(ctx, dst);
318
}
319
320
/* ALU shift immediate (64-bit) */
321
static void emit_shift_i64(struct jit_context *ctx,
322
const u8 dst[], u32 imm, u8 op)
323
{
324
switch (BPF_OP(op)) {
325
/* dst = dst << imm */
326
case BPF_LSH:
327
if (imm < 32) {
328
emit(ctx, srl, MIPS_R_T9, lo(dst), 32 - imm);
329
emit(ctx, sll, lo(dst), lo(dst), imm);
330
emit(ctx, sll, hi(dst), hi(dst), imm);
331
emit(ctx, or, hi(dst), hi(dst), MIPS_R_T9);
332
} else {
333
emit(ctx, sll, hi(dst), lo(dst), imm - 32);
334
emit(ctx, move, lo(dst), MIPS_R_ZERO);
335
}
336
break;
337
/* dst = dst >> imm */
338
case BPF_RSH:
339
if (imm < 32) {
340
emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
341
emit(ctx, srl, lo(dst), lo(dst), imm);
342
emit(ctx, srl, hi(dst), hi(dst), imm);
343
emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
344
} else {
345
emit(ctx, srl, lo(dst), hi(dst), imm - 32);
346
emit(ctx, move, hi(dst), MIPS_R_ZERO);
347
}
348
break;
349
/* dst = dst >> imm (arithmetic) */
350
case BPF_ARSH:
351
if (imm < 32) {
352
emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
353
emit(ctx, srl, lo(dst), lo(dst), imm);
354
emit(ctx, sra, hi(dst), hi(dst), imm);
355
emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
356
} else {
357
emit(ctx, sra, lo(dst), hi(dst), imm - 32);
358
emit(ctx, sra, hi(dst), hi(dst), 31);
359
}
360
break;
361
}
362
clobber_reg64(ctx, dst);
363
}
364
365
/* ALU shift register (64-bit) */
366
static void emit_shift_r64(struct jit_context *ctx,
367
const u8 dst[], u8 src, u8 op)
368
{
369
u8 t1 = MIPS_R_T8;
370
u8 t2 = MIPS_R_T9;
371
372
emit(ctx, andi, t1, src, 32); /* t1 = src & 32 */
373
emit(ctx, beqz, t1, 16); /* PC += 16 if t1 == 0 */
374
emit(ctx, nor, t2, src, MIPS_R_ZERO); /* t2 = ~src (delay slot) */
375
376
switch (BPF_OP(op)) {
377
/* dst = dst << src */
378
case BPF_LSH:
379
/* Next: shift >= 32 */
380
emit(ctx, sllv, hi(dst), lo(dst), src); /* dh = dl << src */
381
emit(ctx, move, lo(dst), MIPS_R_ZERO); /* dl = 0 */
382
emit(ctx, b, 20); /* PC += 20 */
383
/* +16: shift < 32 */
384
emit(ctx, srl, t1, lo(dst), 1); /* t1 = dl >> 1 */
385
emit(ctx, srlv, t1, t1, t2); /* t1 = t1 >> t2 */
386
emit(ctx, sllv, lo(dst), lo(dst), src); /* dl = dl << src */
387
emit(ctx, sllv, hi(dst), hi(dst), src); /* dh = dh << src */
388
emit(ctx, or, hi(dst), hi(dst), t1); /* dh = dh | t1 */
389
break;
390
/* dst = dst >> src */
391
case BPF_RSH:
392
/* Next: shift >= 32 */
393
emit(ctx, srlv, lo(dst), hi(dst), src); /* dl = dh >> src */
394
emit(ctx, move, hi(dst), MIPS_R_ZERO); /* dh = 0 */
395
emit(ctx, b, 20); /* PC += 20 */
396
/* +16: shift < 32 */
397
emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */
398
emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */
399
emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >> src */
400
emit(ctx, srlv, hi(dst), hi(dst), src); /* dh = dh >> src */
401
emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */
402
break;
403
/* dst = dst >> src (arithmetic) */
404
case BPF_ARSH:
405
/* Next: shift >= 32 */
406
emit(ctx, srav, lo(dst), hi(dst), src); /* dl = dh >>a src */
407
emit(ctx, sra, hi(dst), hi(dst), 31); /* dh = dh >>a 31 */
408
emit(ctx, b, 20); /* PC += 20 */
409
/* +16: shift < 32 */
410
emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */
411
emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */
412
emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >>a src */
413
emit(ctx, srav, hi(dst), hi(dst), src); /* dh = dh >> src */
414
emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */
415
break;
416
}
417
418
/* +20: Done */
419
clobber_reg64(ctx, dst);
420
}
421
422
/* ALU mul immediate (64x32-bit) */
423
static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
424
{
425
u8 src = MIPS_R_T6;
426
u8 tmp = MIPS_R_T9;
427
428
switch (imm) {
429
/* dst = dst * 1 is a no-op */
430
case 1:
431
break;
432
/* dst = dst * -1 */
433
case -1:
434
emit_neg_i64(ctx, dst);
435
break;
436
case 0:
437
emit_mov_r(ctx, lo(dst), MIPS_R_ZERO);
438
emit_mov_r(ctx, hi(dst), MIPS_R_ZERO);
439
break;
440
/* Full 64x32 multiply */
441
default:
442
/* hi(dst) = hi(dst) * src(imm) */
443
emit_mov_i(ctx, src, imm);
444
if (cpu_has_mips32r1 || cpu_has_mips32r6) {
445
emit(ctx, mul, hi(dst), hi(dst), src);
446
} else {
447
emit(ctx, multu, hi(dst), src);
448
emit(ctx, mflo, hi(dst));
449
}
450
451
/* hi(dst) = hi(dst) - lo(dst) */
452
if (imm < 0)
453
emit(ctx, subu, hi(dst), hi(dst), lo(dst));
454
455
/* tmp = lo(dst) * src(imm) >> 32 */
456
/* lo(dst) = lo(dst) * src(imm) */
457
if (cpu_has_mips32r6) {
458
emit(ctx, muhu, tmp, lo(dst), src);
459
emit(ctx, mulu, lo(dst), lo(dst), src);
460
} else {
461
emit(ctx, multu, lo(dst), src);
462
emit(ctx, mflo, lo(dst));
463
emit(ctx, mfhi, tmp);
464
}
465
466
/* hi(dst) += tmp */
467
emit(ctx, addu, hi(dst), hi(dst), tmp);
468
clobber_reg64(ctx, dst);
469
break;
470
}
471
}
472
473
/* ALU mul register (64x64-bit) */
474
static void emit_mul_r64(struct jit_context *ctx,
475
const u8 dst[], const u8 src[])
476
{
477
u8 acc = MIPS_R_T8;
478
u8 tmp = MIPS_R_T9;
479
480
/* acc = hi(dst) * lo(src) */
481
if (cpu_has_mips32r1 || cpu_has_mips32r6) {
482
emit(ctx, mul, acc, hi(dst), lo(src));
483
} else {
484
emit(ctx, multu, hi(dst), lo(src));
485
emit(ctx, mflo, acc);
486
}
487
488
/* tmp = lo(dst) * hi(src) */
489
if (cpu_has_mips32r1 || cpu_has_mips32r6) {
490
emit(ctx, mul, tmp, lo(dst), hi(src));
491
} else {
492
emit(ctx, multu, lo(dst), hi(src));
493
emit(ctx, mflo, tmp);
494
}
495
496
/* acc += tmp */
497
emit(ctx, addu, acc, acc, tmp);
498
499
/* tmp = lo(dst) * lo(src) >> 32 */
500
/* lo(dst) = lo(dst) * lo(src) */
501
if (cpu_has_mips32r6) {
502
emit(ctx, muhu, tmp, lo(dst), lo(src));
503
emit(ctx, mulu, lo(dst), lo(dst), lo(src));
504
} else {
505
emit(ctx, multu, lo(dst), lo(src));
506
emit(ctx, mflo, lo(dst));
507
emit(ctx, mfhi, tmp);
508
}
509
510
/* hi(dst) = acc + tmp */
511
emit(ctx, addu, hi(dst), acc, tmp);
512
clobber_reg64(ctx, dst);
513
}
514
515
/* Helper function for 64-bit modulo */
516
static u64 jit_mod64(u64 a, u64 b)
517
{
518
u64 rem;
519
520
div64_u64_rem(a, b, &rem);
521
return rem;
522
}
523
524
/* ALU div/mod register (64-bit) */
525
static void emit_divmod_r64(struct jit_context *ctx,
526
const u8 dst[], const u8 src[], u8 op)
527
{
528
const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
529
const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
530
const u8 *r2 = bpf2mips32[BPF_REG_2]; /* Mapped to a2-a3 */
531
int exclude, k;
532
u32 addr = 0;
533
534
/* Push caller-saved registers on stack */
535
push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
536
0, JIT_RESERVED_STACK);
537
538
/* Put 64-bit arguments 1 and 2 in registers a0-a3 */
539
for (k = 0; k < 2; k++) {
540
emit(ctx, move, MIPS_R_T9, src[k]);
541
emit(ctx, move, r1[k], dst[k]);
542
emit(ctx, move, r2[k], MIPS_R_T9);
543
}
544
545
/* Emit function call */
546
switch (BPF_OP(op)) {
547
/* dst = dst / src */
548
case BPF_DIV:
549
addr = (u32)&div64_u64;
550
break;
551
/* dst = dst % src */
552
case BPF_MOD:
553
addr = (u32)&jit_mod64;
554
break;
555
}
556
emit_mov_i(ctx, MIPS_R_T9, addr);
557
emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
558
emit(ctx, nop); /* Delay slot */
559
560
/* Store the 64-bit result in dst */
561
emit(ctx, move, dst[0], r0[0]);
562
emit(ctx, move, dst[1], r0[1]);
563
564
/* Restore caller-saved registers, excluding the computed result */
565
exclude = BIT(lo(dst)) | BIT(hi(dst));
566
pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
567
exclude, JIT_RESERVED_STACK);
568
emit_load_delay(ctx);
569
570
clobber_reg64(ctx, dst);
571
clobber_reg(ctx, MIPS_R_V0);
572
clobber_reg(ctx, MIPS_R_V1);
573
clobber_reg(ctx, MIPS_R_RA);
574
}
575
576
/* Swap bytes in a register word */
577
static void emit_swap8_r(struct jit_context *ctx, u8 dst, u8 src, u8 mask)
578
{
579
u8 tmp = MIPS_R_T9;
580
581
emit(ctx, and, tmp, src, mask); /* tmp = src & 0x00ff00ff */
582
emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */
583
emit(ctx, srl, dst, src, 8); /* dst = src >> 8 */
584
emit(ctx, and, dst, dst, mask); /* dst = dst & 0x00ff00ff */
585
emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */
586
}
587
588
/* Swap half words in a register word */
589
static void emit_swap16_r(struct jit_context *ctx, u8 dst, u8 src)
590
{
591
u8 tmp = MIPS_R_T9;
592
593
emit(ctx, sll, tmp, src, 16); /* tmp = src << 16 */
594
emit(ctx, srl, dst, src, 16); /* dst = src >> 16 */
595
emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */
596
}
597
598
/* Swap bytes and truncate a register double word, word or half word */
599
static void emit_bswap_r64(struct jit_context *ctx, const u8 dst[], u32 width)
600
{
601
u8 tmp = MIPS_R_T8;
602
603
switch (width) {
604
/* Swap bytes in a double word */
605
case 64:
606
if (cpu_has_mips32r2 || cpu_has_mips32r6) {
607
emit(ctx, rotr, tmp, hi(dst), 16);
608
emit(ctx, rotr, hi(dst), lo(dst), 16);
609
emit(ctx, wsbh, lo(dst), tmp);
610
emit(ctx, wsbh, hi(dst), hi(dst));
611
} else {
612
emit_swap16_r(ctx, tmp, lo(dst));
613
emit_swap16_r(ctx, lo(dst), hi(dst));
614
emit(ctx, move, hi(dst), tmp);
615
616
emit(ctx, lui, tmp, 0xff); /* tmp = 0x00ff0000 */
617
emit(ctx, ori, tmp, tmp, 0xff); /* tmp = 0x00ff00ff */
618
emit_swap8_r(ctx, lo(dst), lo(dst), tmp);
619
emit_swap8_r(ctx, hi(dst), hi(dst), tmp);
620
}
621
break;
622
/* Swap bytes in a word */
623
/* Swap bytes in a half word */
624
case 32:
625
case 16:
626
emit_bswap_r(ctx, lo(dst), width);
627
emit(ctx, move, hi(dst), MIPS_R_ZERO);
628
break;
629
}
630
clobber_reg64(ctx, dst);
631
}
632
633
/* Truncate a register double word, word or half word */
634
static void emit_trunc_r64(struct jit_context *ctx, const u8 dst[], u32 width)
635
{
636
switch (width) {
637
case 64:
638
break;
639
/* Zero-extend a word */
640
case 32:
641
emit(ctx, move, hi(dst), MIPS_R_ZERO);
642
clobber_reg(ctx, hi(dst));
643
break;
644
/* Zero-extend a half word */
645
case 16:
646
emit(ctx, move, hi(dst), MIPS_R_ZERO);
647
emit(ctx, andi, lo(dst), lo(dst), 0xffff);
648
clobber_reg64(ctx, dst);
649
break;
650
}
651
}
652
653
/* Load operation: dst = *(size*)(src + off) */
654
static void emit_ldx(struct jit_context *ctx,
655
const u8 dst[], u8 src, s16 off, u8 size)
656
{
657
switch (size) {
658
/* Load a byte */
659
case BPF_B:
660
emit(ctx, lbu, lo(dst), off, src);
661
emit(ctx, move, hi(dst), MIPS_R_ZERO);
662
break;
663
/* Load a half word */
664
case BPF_H:
665
emit(ctx, lhu, lo(dst), off, src);
666
emit(ctx, move, hi(dst), MIPS_R_ZERO);
667
break;
668
/* Load a word */
669
case BPF_W:
670
emit(ctx, lw, lo(dst), off, src);
671
emit(ctx, move, hi(dst), MIPS_R_ZERO);
672
break;
673
/* Load a double word */
674
case BPF_DW:
675
if (dst[1] == src) {
676
emit(ctx, lw, dst[0], off + 4, src);
677
emit(ctx, lw, dst[1], off, src);
678
} else {
679
emit(ctx, lw, dst[1], off, src);
680
emit(ctx, lw, dst[0], off + 4, src);
681
}
682
emit_load_delay(ctx);
683
break;
684
}
685
clobber_reg64(ctx, dst);
686
}
687
688
/* Store operation: *(size *)(dst + off) = src */
689
static void emit_stx(struct jit_context *ctx,
690
const u8 dst, const u8 src[], s16 off, u8 size)
691
{
692
switch (size) {
693
/* Store a byte */
694
case BPF_B:
695
emit(ctx, sb, lo(src), off, dst);
696
break;
697
/* Store a half word */
698
case BPF_H:
699
emit(ctx, sh, lo(src), off, dst);
700
break;
701
/* Store a word */
702
case BPF_W:
703
emit(ctx, sw, lo(src), off, dst);
704
break;
705
/* Store a double word */
706
case BPF_DW:
707
emit(ctx, sw, src[1], off, dst);
708
emit(ctx, sw, src[0], off + 4, dst);
709
break;
710
}
711
}
712
713
/* Atomic read-modify-write (32-bit, non-ll/sc fallback) */
714
static void emit_atomic_r32(struct jit_context *ctx,
715
u8 dst, u8 src, s16 off, u8 code)
716
{
717
u32 exclude = 0;
718
u32 addr = 0;
719
720
/* Push caller-saved registers on stack */
721
push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
722
0, JIT_RESERVED_STACK);
723
/*
724
* Argument 1: dst+off if xchg, otherwise src, passed in register a0
725
* Argument 2: src if xchg, otherwise dst+off, passed in register a1
726
*/
727
emit(ctx, move, MIPS_R_T9, dst);
728
if (code == BPF_XCHG) {
729
emit(ctx, move, MIPS_R_A1, src);
730
emit(ctx, addiu, MIPS_R_A0, MIPS_R_T9, off);
731
} else {
732
emit(ctx, move, MIPS_R_A0, src);
733
emit(ctx, addiu, MIPS_R_A1, MIPS_R_T9, off);
734
}
735
736
/* Emit function call */
737
switch (code) {
738
case BPF_ADD:
739
addr = (u32)&atomic_add;
740
break;
741
case BPF_ADD | BPF_FETCH:
742
addr = (u32)&atomic_fetch_add;
743
break;
744
case BPF_SUB:
745
addr = (u32)&atomic_sub;
746
break;
747
case BPF_SUB | BPF_FETCH:
748
addr = (u32)&atomic_fetch_sub;
749
break;
750
case BPF_OR:
751
addr = (u32)&atomic_or;
752
break;
753
case BPF_OR | BPF_FETCH:
754
addr = (u32)&atomic_fetch_or;
755
break;
756
case BPF_AND:
757
addr = (u32)&atomic_and;
758
break;
759
case BPF_AND | BPF_FETCH:
760
addr = (u32)&atomic_fetch_and;
761
break;
762
case BPF_XOR:
763
addr = (u32)&atomic_xor;
764
break;
765
case BPF_XOR | BPF_FETCH:
766
addr = (u32)&atomic_fetch_xor;
767
break;
768
case BPF_XCHG:
769
addr = (u32)&atomic_xchg;
770
break;
771
}
772
emit_mov_i(ctx, MIPS_R_T9, addr);
773
emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
774
emit(ctx, nop); /* Delay slot */
775
776
/* Update src register with old value, if specified */
777
if (code & BPF_FETCH) {
778
emit(ctx, move, src, MIPS_R_V0);
779
exclude = BIT(src);
780
clobber_reg(ctx, src);
781
}
782
783
/* Restore caller-saved registers, except any fetched value */
784
pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
785
exclude, JIT_RESERVED_STACK);
786
emit_load_delay(ctx);
787
clobber_reg(ctx, MIPS_R_RA);
788
}
789
790
/* Helper function for 64-bit atomic exchange */
791
static s64 jit_xchg64(s64 a, atomic64_t *v)
792
{
793
return atomic64_xchg(v, a);
794
}
795
796
/* Atomic read-modify-write (64-bit) */
797
static void emit_atomic_r64(struct jit_context *ctx,
798
u8 dst, const u8 src[], s16 off, u8 code)
799
{
800
const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
801
const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
802
u32 exclude = 0;
803
u32 addr = 0;
804
805
/* Push caller-saved registers on stack */
806
push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
807
0, JIT_RESERVED_STACK);
808
/*
809
* Argument 1: 64-bit src, passed in registers a0-a1
810
* Argument 2: 32-bit dst+off, passed in register a2
811
*/
812
emit(ctx, move, MIPS_R_T9, dst);
813
emit(ctx, move, r1[0], src[0]);
814
emit(ctx, move, r1[1], src[1]);
815
emit(ctx, addiu, MIPS_R_A2, MIPS_R_T9, off);
816
817
/* Emit function call */
818
switch (code) {
819
case BPF_ADD:
820
addr = (u32)&atomic64_add;
821
break;
822
case BPF_ADD | BPF_FETCH:
823
addr = (u32)&atomic64_fetch_add;
824
break;
825
case BPF_SUB:
826
addr = (u32)&atomic64_sub;
827
break;
828
case BPF_SUB | BPF_FETCH:
829
addr = (u32)&atomic64_fetch_sub;
830
break;
831
case BPF_OR:
832
addr = (u32)&atomic64_or;
833
break;
834
case BPF_OR | BPF_FETCH:
835
addr = (u32)&atomic64_fetch_or;
836
break;
837
case BPF_AND:
838
addr = (u32)&atomic64_and;
839
break;
840
case BPF_AND | BPF_FETCH:
841
addr = (u32)&atomic64_fetch_and;
842
break;
843
case BPF_XOR:
844
addr = (u32)&atomic64_xor;
845
break;
846
case BPF_XOR | BPF_FETCH:
847
addr = (u32)&atomic64_fetch_xor;
848
break;
849
case BPF_XCHG:
850
addr = (u32)&jit_xchg64;
851
break;
852
}
853
emit_mov_i(ctx, MIPS_R_T9, addr);
854
emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
855
emit(ctx, nop); /* Delay slot */
856
857
/* Update src register with old value, if specified */
858
if (code & BPF_FETCH) {
859
emit(ctx, move, lo(src), lo(r0));
860
emit(ctx, move, hi(src), hi(r0));
861
exclude = BIT(src[0]) | BIT(src[1]);
862
clobber_reg64(ctx, src);
863
}
864
865
/* Restore caller-saved registers, except any fetched value */
866
pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
867
exclude, JIT_RESERVED_STACK);
868
emit_load_delay(ctx);
869
clobber_reg(ctx, MIPS_R_RA);
870
}
871
872
/* Atomic compare-and-exchange (32-bit, non-ll/sc fallback) */
873
static void emit_cmpxchg_r32(struct jit_context *ctx, u8 dst, u8 src, s16 off)
874
{
875
const u8 *r0 = bpf2mips32[BPF_REG_0];
876
877
/* Push caller-saved registers on stack */
878
push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
879
JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
880
/*
881
* Argument 1: 32-bit dst+off, passed in register a0
882
* Argument 2: 32-bit r0, passed in register a1
883
* Argument 3: 32-bit src, passed in register a2
884
*/
885
emit(ctx, addiu, MIPS_R_T9, dst, off);
886
emit(ctx, move, MIPS_R_T8, src);
887
emit(ctx, move, MIPS_R_A1, lo(r0));
888
emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
889
emit(ctx, move, MIPS_R_A2, MIPS_R_T8);
890
891
/* Emit function call */
892
emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic_cmpxchg);
893
emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
894
emit(ctx, nop); /* Delay slot */
895
896
#ifdef __BIG_ENDIAN
897
emit(ctx, move, lo(r0), MIPS_R_V0);
898
#endif
899
/* Restore caller-saved registers, except the return value */
900
pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
901
JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
902
emit_load_delay(ctx);
903
clobber_reg(ctx, MIPS_R_V0);
904
clobber_reg(ctx, MIPS_R_V1);
905
clobber_reg(ctx, MIPS_R_RA);
906
}
907
908
/* Atomic compare-and-exchange (64-bit) */
909
static void emit_cmpxchg_r64(struct jit_context *ctx,
910
u8 dst, const u8 src[], s16 off)
911
{
912
const u8 *r0 = bpf2mips32[BPF_REG_0];
913
const u8 *r2 = bpf2mips32[BPF_REG_2];
914
915
/* Push caller-saved registers on stack */
916
push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
917
JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
918
/*
919
* Argument 1: 32-bit dst+off, passed in register a0 (a1 unused)
920
* Argument 2: 64-bit r0, passed in registers a2-a3
921
* Argument 3: 64-bit src, passed on stack
922
*/
923
push_regs(ctx, BIT(src[0]) | BIT(src[1]), 0, JIT_RESERVED_STACK);
924
emit(ctx, addiu, MIPS_R_T9, dst, off);
925
emit(ctx, move, r2[0], r0[0]);
926
emit(ctx, move, r2[1], r0[1]);
927
emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
928
929
/* Emit function call */
930
emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic64_cmpxchg);
931
emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
932
emit(ctx, nop); /* Delay slot */
933
934
/* Restore caller-saved registers, except the return value */
935
pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
936
JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
937
emit_load_delay(ctx);
938
clobber_reg(ctx, MIPS_R_V0);
939
clobber_reg(ctx, MIPS_R_V1);
940
clobber_reg(ctx, MIPS_R_RA);
941
}
942
943
/*
944
* Conditional movz or an emulated equivalent.
945
* Note that the rs register may be modified.
946
*/
947
static void emit_movz_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
948
{
949
if (cpu_has_mips_2) {
950
emit(ctx, movz, rd, rs, rt); /* rd = rt ? rd : rs */
951
} else if (cpu_has_mips32r6) {
952
if (rs != MIPS_R_ZERO)
953
emit(ctx, seleqz, rs, rs, rt); /* rs = 0 if rt == 0 */
954
emit(ctx, selnez, rd, rd, rt); /* rd = 0 if rt != 0 */
955
if (rs != MIPS_R_ZERO)
956
emit(ctx, or, rd, rd, rs); /* rd = rd | rs */
957
} else {
958
emit(ctx, bnez, rt, 8); /* PC += 8 if rd != 0 */
959
emit(ctx, nop); /* +0: delay slot */
960
emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */
961
}
962
clobber_reg(ctx, rd);
963
clobber_reg(ctx, rs);
964
}
965
966
/*
967
* Conditional movn or an emulated equivalent.
968
* Note that the rs register may be modified.
969
*/
970
static void emit_movn_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
971
{
972
if (cpu_has_mips_2) {
973
emit(ctx, movn, rd, rs, rt); /* rd = rt ? rs : rd */
974
} else if (cpu_has_mips32r6) {
975
if (rs != MIPS_R_ZERO)
976
emit(ctx, selnez, rs, rs, rt); /* rs = 0 if rt == 0 */
977
emit(ctx, seleqz, rd, rd, rt); /* rd = 0 if rt != 0 */
978
if (rs != MIPS_R_ZERO)
979
emit(ctx, or, rd, rd, rs); /* rd = rd | rs */
980
} else {
981
emit(ctx, beqz, rt, 8); /* PC += 8 if rd == 0 */
982
emit(ctx, nop); /* +0: delay slot */
983
emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */
984
}
985
clobber_reg(ctx, rd);
986
clobber_reg(ctx, rs);
987
}
988
989
/* Emulation of 64-bit sltiu rd, rs, imm, where imm may be S32_MAX + 1 */
990
static void emit_sltiu_r64(struct jit_context *ctx, u8 rd,
991
const u8 rs[], s64 imm)
992
{
993
u8 tmp = MIPS_R_T9;
994
995
if (imm < 0) {
996
emit_mov_i(ctx, rd, imm); /* rd = imm */
997
emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */
998
emit(ctx, sltiu, tmp, hi(rs), -1); /* tmp = rsh < ~0U */
999
emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */
1000
} else { /* imm >= 0 */
1001
if (imm > 0x7fff) {
1002
emit_mov_i(ctx, rd, (s32)imm); /* rd = imm */
1003
emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */
1004
} else {
1005
emit(ctx, sltiu, rd, lo(rs), imm); /* rd = rsl < imm */
1006
}
1007
emit_movn_r(ctx, rd, MIPS_R_ZERO, hi(rs)); /* rd = 0 if rsh */
1008
}
1009
}
1010
1011
/* Emulation of 64-bit sltu rd, rs, rt */
1012
static void emit_sltu_r64(struct jit_context *ctx, u8 rd,
1013
const u8 rs[], const u8 rt[])
1014
{
1015
u8 tmp = MIPS_R_T9;
1016
1017
emit(ctx, sltu, rd, lo(rs), lo(rt)); /* rd = rsl < rtl */
1018
emit(ctx, subu, tmp, hi(rs), hi(rt)); /* tmp = rsh - rth */
1019
emit_movn_r(ctx, rd, MIPS_R_ZERO, tmp); /* rd = 0 if tmp != 0 */
1020
emit(ctx, sltu, tmp, hi(rs), hi(rt)); /* tmp = rsh < rth */
1021
emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */
1022
}
1023
1024
/* Emulation of 64-bit slti rd, rs, imm, where imm may be S32_MAX + 1 */
1025
static void emit_slti_r64(struct jit_context *ctx, u8 rd,
1026
const u8 rs[], s64 imm)
1027
{
1028
u8 t1 = MIPS_R_T8;
1029
u8 t2 = MIPS_R_T9;
1030
u8 cmp;
1031
1032
/*
1033
* if ((rs < 0) ^ (imm < 0)) t1 = imm >u rsl
1034
* else t1 = rsl <u imm
1035
*/
1036
emit_mov_i(ctx, rd, (s32)imm);
1037
emit(ctx, sltu, t1, lo(rs), rd); /* t1 = rsl <u imm */
1038
emit(ctx, sltu, t2, rd, lo(rs)); /* t2 = imm <u rsl */
1039
emit(ctx, srl, rd, hi(rs), 31); /* rd = rsh >> 31 */
1040
if (imm < 0)
1041
emit_movz_r(ctx, t1, t2, rd); /* t1 = rd ? t1 : t2 */
1042
else
1043
emit_movn_r(ctx, t1, t2, rd); /* t1 = rd ? t2 : t1 */
1044
/*
1045
* if ((imm < 0 && rsh != 0xffffffff) ||
1046
* (imm >= 0 && rsh != 0))
1047
* t1 = 0
1048
*/
1049
if (imm < 0) {
1050
emit(ctx, addiu, rd, hi(rs), 1); /* rd = rsh + 1 */
1051
cmp = rd;
1052
} else { /* imm >= 0 */
1053
cmp = hi(rs);
1054
}
1055
emit_movn_r(ctx, t1, MIPS_R_ZERO, cmp); /* t1 = 0 if cmp != 0 */
1056
1057
/*
1058
* if (imm < 0) rd = rsh < -1
1059
* else rd = rsh != 0
1060
* rd = rd | t1
1061
*/
1062
emit(ctx, slti, rd, hi(rs), imm < 0 ? -1 : 0); /* rd = rsh < hi(imm) */
1063
emit(ctx, or, rd, rd, t1); /* rd = rd | t1 */
1064
}
1065
1066
/* Emulation of 64-bit(slt rd, rs, rt) */
1067
static void emit_slt_r64(struct jit_context *ctx, u8 rd,
1068
const u8 rs[], const u8 rt[])
1069
{
1070
u8 t1 = MIPS_R_T7;
1071
u8 t2 = MIPS_R_T8;
1072
u8 t3 = MIPS_R_T9;
1073
1074
/*
1075
* if ((rs < 0) ^ (rt < 0)) t1 = rtl <u rsl
1076
* else t1 = rsl <u rtl
1077
* if (rsh == rth) t1 = 0
1078
*/
1079
emit(ctx, sltu, t1, lo(rs), lo(rt)); /* t1 = rsl <u rtl */
1080
emit(ctx, sltu, t2, lo(rt), lo(rs)); /* t2 = rtl <u rsl */
1081
emit(ctx, xor, t3, hi(rs), hi(rt)); /* t3 = rlh ^ rth */
1082
emit(ctx, srl, rd, t3, 31); /* rd = t3 >> 31 */
1083
emit_movn_r(ctx, t1, t2, rd); /* t1 = rd ? t2 : t1 */
1084
emit_movn_r(ctx, t1, MIPS_R_ZERO, t3); /* t1 = 0 if t3 != 0 */
1085
1086
/* rd = (rsh < rth) | t1 */
1087
emit(ctx, slt, rd, hi(rs), hi(rt)); /* rd = rsh <s rth */
1088
emit(ctx, or, rd, rd, t1); /* rd = rd | t1 */
1089
}
1090
1091
/* Jump immediate (64-bit) */
1092
static void emit_jmp_i64(struct jit_context *ctx,
1093
const u8 dst[], s32 imm, s32 off, u8 op)
1094
{
1095
u8 tmp = MIPS_R_T6;
1096
1097
switch (op) {
1098
/* No-op, used internally for branch optimization */
1099
case JIT_JNOP:
1100
break;
1101
/* PC += off if dst == imm */
1102
/* PC += off if dst != imm */
1103
case BPF_JEQ:
1104
case BPF_JNE:
1105
if (imm >= -0x7fff && imm <= 0x8000) {
1106
emit(ctx, addiu, tmp, lo(dst), -imm);
1107
} else if ((u32)imm <= 0xffff) {
1108
emit(ctx, xori, tmp, lo(dst), imm);
1109
} else { /* Register fallback */
1110
emit_mov_i(ctx, tmp, imm);
1111
emit(ctx, xor, tmp, lo(dst), tmp);
1112
}
1113
if (imm < 0) { /* Compare sign extension */
1114
emit(ctx, addu, MIPS_R_T9, hi(dst), 1);
1115
emit(ctx, or, tmp, tmp, MIPS_R_T9);
1116
} else { /* Compare zero extension */
1117
emit(ctx, or, tmp, tmp, hi(dst));
1118
}
1119
if (op == BPF_JEQ)
1120
emit(ctx, beqz, tmp, off);
1121
else /* BPF_JNE */
1122
emit(ctx, bnez, tmp, off);
1123
break;
1124
/* PC += off if dst & imm */
1125
/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
1126
case BPF_JSET:
1127
case JIT_JNSET:
1128
if ((u32)imm <= 0xffff) {
1129
emit(ctx, andi, tmp, lo(dst), imm);
1130
} else { /* Register fallback */
1131
emit_mov_i(ctx, tmp, imm);
1132
emit(ctx, and, tmp, lo(dst), tmp);
1133
}
1134
if (imm < 0) /* Sign-extension pulls in high word */
1135
emit(ctx, or, tmp, tmp, hi(dst));
1136
if (op == BPF_JSET)
1137
emit(ctx, bnez, tmp, off);
1138
else /* JIT_JNSET */
1139
emit(ctx, beqz, tmp, off);
1140
break;
1141
/* PC += off if dst > imm */
1142
case BPF_JGT:
1143
emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
1144
emit(ctx, beqz, tmp, off);
1145
break;
1146
/* PC += off if dst >= imm */
1147
case BPF_JGE:
1148
emit_sltiu_r64(ctx, tmp, dst, imm);
1149
emit(ctx, beqz, tmp, off);
1150
break;
1151
/* PC += off if dst < imm */
1152
case BPF_JLT:
1153
emit_sltiu_r64(ctx, tmp, dst, imm);
1154
emit(ctx, bnez, tmp, off);
1155
break;
1156
/* PC += off if dst <= imm */
1157
case BPF_JLE:
1158
emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
1159
emit(ctx, bnez, tmp, off);
1160
break;
1161
/* PC += off if dst > imm (signed) */
1162
case BPF_JSGT:
1163
emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
1164
emit(ctx, beqz, tmp, off);
1165
break;
1166
/* PC += off if dst >= imm (signed) */
1167
case BPF_JSGE:
1168
emit_slti_r64(ctx, tmp, dst, imm);
1169
emit(ctx, beqz, tmp, off);
1170
break;
1171
/* PC += off if dst < imm (signed) */
1172
case BPF_JSLT:
1173
emit_slti_r64(ctx, tmp, dst, imm);
1174
emit(ctx, bnez, tmp, off);
1175
break;
1176
/* PC += off if dst <= imm (signed) */
1177
case BPF_JSLE:
1178
emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
1179
emit(ctx, bnez, tmp, off);
1180
break;
1181
}
1182
}
1183
1184
/* Jump register (64-bit) */
1185
static void emit_jmp_r64(struct jit_context *ctx,
1186
const u8 dst[], const u8 src[], s32 off, u8 op)
1187
{
1188
u8 t1 = MIPS_R_T6;
1189
u8 t2 = MIPS_R_T7;
1190
1191
switch (op) {
1192
/* No-op, used internally for branch optimization */
1193
case JIT_JNOP:
1194
break;
1195
/* PC += off if dst == src */
1196
/* PC += off if dst != src */
1197
case BPF_JEQ:
1198
case BPF_JNE:
1199
emit(ctx, subu, t1, lo(dst), lo(src));
1200
emit(ctx, subu, t2, hi(dst), hi(src));
1201
emit(ctx, or, t1, t1, t2);
1202
if (op == BPF_JEQ)
1203
emit(ctx, beqz, t1, off);
1204
else /* BPF_JNE */
1205
emit(ctx, bnez, t1, off);
1206
break;
1207
/* PC += off if dst & src */
1208
/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
1209
case BPF_JSET:
1210
case JIT_JNSET:
1211
emit(ctx, and, t1, lo(dst), lo(src));
1212
emit(ctx, and, t2, hi(dst), hi(src));
1213
emit(ctx, or, t1, t1, t2);
1214
if (op == BPF_JSET)
1215
emit(ctx, bnez, t1, off);
1216
else /* JIT_JNSET */
1217
emit(ctx, beqz, t1, off);
1218
break;
1219
/* PC += off if dst > src */
1220
case BPF_JGT:
1221
emit_sltu_r64(ctx, t1, src, dst);
1222
emit(ctx, bnez, t1, off);
1223
break;
1224
/* PC += off if dst >= src */
1225
case BPF_JGE:
1226
emit_sltu_r64(ctx, t1, dst, src);
1227
emit(ctx, beqz, t1, off);
1228
break;
1229
/* PC += off if dst < src */
1230
case BPF_JLT:
1231
emit_sltu_r64(ctx, t1, dst, src);
1232
emit(ctx, bnez, t1, off);
1233
break;
1234
/* PC += off if dst <= src */
1235
case BPF_JLE:
1236
emit_sltu_r64(ctx, t1, src, dst);
1237
emit(ctx, beqz, t1, off);
1238
break;
1239
/* PC += off if dst > src (signed) */
1240
case BPF_JSGT:
1241
emit_slt_r64(ctx, t1, src, dst);
1242
emit(ctx, bnez, t1, off);
1243
break;
1244
/* PC += off if dst >= src (signed) */
1245
case BPF_JSGE:
1246
emit_slt_r64(ctx, t1, dst, src);
1247
emit(ctx, beqz, t1, off);
1248
break;
1249
/* PC += off if dst < src (signed) */
1250
case BPF_JSLT:
1251
emit_slt_r64(ctx, t1, dst, src);
1252
emit(ctx, bnez, t1, off);
1253
break;
1254
/* PC += off if dst <= src (signed) */
1255
case BPF_JSLE:
1256
emit_slt_r64(ctx, t1, src, dst);
1257
emit(ctx, beqz, t1, off);
1258
break;
1259
}
1260
}
1261
1262
/* Function call */
1263
static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
1264
{
1265
bool fixed;
1266
u64 addr;
1267
1268
/* Decode the call address */
1269
if (bpf_jit_get_func_addr(ctx->program, insn, false,
1270
&addr, &fixed) < 0)
1271
return -1;
1272
if (!fixed)
1273
return -1;
1274
1275
/* Push stack arguments */
1276
push_regs(ctx, JIT_STACK_REGS, 0, JIT_RESERVED_STACK);
1277
1278
/* Emit function call */
1279
emit_mov_i(ctx, MIPS_R_T9, addr);
1280
emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
1281
emit(ctx, nop); /* Delay slot */
1282
1283
clobber_reg(ctx, MIPS_R_RA);
1284
clobber_reg(ctx, MIPS_R_V0);
1285
clobber_reg(ctx, MIPS_R_V1);
1286
return 0;
1287
}
1288
1289
/* Function tail call */
1290
static int emit_tail_call(struct jit_context *ctx)
1291
{
1292
u8 ary = lo(bpf2mips32[BPF_REG_2]);
1293
u8 ind = lo(bpf2mips32[BPF_REG_3]);
1294
u8 t1 = MIPS_R_T8;
1295
u8 t2 = MIPS_R_T9;
1296
int off;
1297
1298
/*
1299
* Tail call:
1300
* eBPF R1 - function argument (context ptr), passed in a0-a1
1301
* eBPF R2 - ptr to object with array of function entry points
1302
* eBPF R3 - array index of function to be called
1303
* stack[sz] - remaining tail call count, initialized in prologue
1304
*/
1305
1306
/* if (ind >= ary->map.max_entries) goto out */
1307
off = offsetof(struct bpf_array, map.max_entries);
1308
if (off > 0x7fff)
1309
return -1;
1310
emit(ctx, lw, t1, off, ary); /* t1 = ary->map.max_entries*/
1311
emit_load_delay(ctx); /* Load delay slot */
1312
emit(ctx, sltu, t1, ind, t1); /* t1 = ind < t1 */
1313
emit(ctx, beqz, t1, get_offset(ctx, 1)); /* PC += off(1) if t1 == 0 */
1314
/* (next insn delay slot) */
1315
/* if (TCC-- <= 0) goto out */
1316
emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP); /* t2 = *(SP + size) */
1317
emit_load_delay(ctx); /* Load delay slot */
1318
emit(ctx, blez, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 <= 0 */
1319
emit(ctx, addiu, t2, t2, -1); /* t2-- (delay slot) */
1320
emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP); /* *(SP + size) = t2 */
1321
1322
/* prog = ary->ptrs[ind] */
1323
off = offsetof(struct bpf_array, ptrs);
1324
if (off > 0x7fff)
1325
return -1;
1326
emit(ctx, sll, t1, ind, 2); /* t1 = ind << 2 */
1327
emit(ctx, addu, t1, t1, ary); /* t1 += ary */
1328
emit(ctx, lw, t2, off, t1); /* t2 = *(t1 + off) */
1329
emit_load_delay(ctx); /* Load delay slot */
1330
1331
/* if (prog == 0) goto out */
1332
emit(ctx, beqz, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 == 0 */
1333
emit(ctx, nop); /* Delay slot */
1334
1335
/* func = prog->bpf_func + 8 (prologue skip offset) */
1336
off = offsetof(struct bpf_prog, bpf_func);
1337
if (off > 0x7fff)
1338
return -1;
1339
emit(ctx, lw, t1, off, t2); /* t1 = *(t2 + off) */
1340
emit_load_delay(ctx); /* Load delay slot */
1341
emit(ctx, addiu, t1, t1, JIT_TCALL_SKIP); /* t1 += skip (8 or 12) */
1342
1343
/* goto func */
1344
build_epilogue(ctx, t1);
1345
return 0;
1346
}
1347
1348
/*
1349
* Stack frame layout for a JITed program (stack grows down).
1350
*
1351
* Higher address : Caller's stack frame :
1352
* :----------------------------:
1353
* : 64-bit eBPF args r3-r5 :
1354
* :----------------------------:
1355
* : Reserved / tail call count :
1356
* +============================+ <--- MIPS sp before call
1357
* | Callee-saved registers, |
1358
* | including RA and FP |
1359
* +----------------------------+ <--- eBPF FP (MIPS zero,fp)
1360
* | Local eBPF variables |
1361
* | allocated by program |
1362
* +----------------------------+
1363
* | Reserved for caller-saved |
1364
* | registers |
1365
* +----------------------------+
1366
* | Reserved for 64-bit eBPF |
1367
* | args r3-r5 & args passed |
1368
* | on stack in kernel calls |
1369
* Lower address +============================+ <--- MIPS sp
1370
*/
1371
1372
/* Build program prologue to set up the stack and registers */
1373
void build_prologue(struct jit_context *ctx)
1374
{
1375
const u8 *r1 = bpf2mips32[BPF_REG_1];
1376
const u8 *fp = bpf2mips32[BPF_REG_FP];
1377
int stack, saved, locals, reserved;
1378
1379
/*
1380
* In the unlikely event that the TCC limit is raised to more
1381
* than 16 bits, it is clamped to the maximum value allowed for
1382
* the generated code (0xffff). It is better fail to compile
1383
* instead of degrading gracefully.
1384
*/
1385
BUILD_BUG_ON(MAX_TAIL_CALL_CNT > 0xffff);
1386
1387
/*
1388
* The first two instructions initialize TCC in the reserved (for us)
1389
* 16-byte area in the parent's stack frame. On a tail call, the
1390
* calling function jumps into the prologue after these instructions.
1391
*/
1392
emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, MAX_TAIL_CALL_CNT);
1393
emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP);
1394
1395
/*
1396
* Register eBPF R1 contains the 32-bit context pointer argument.
1397
* A 32-bit argument is always passed in MIPS register a0, regardless
1398
* of CPU endianness. Initialize R1 accordingly and zero-extend.
1399
*/
1400
#ifdef __BIG_ENDIAN
1401
emit(ctx, move, lo(r1), MIPS_R_A0);
1402
#endif
1403
1404
/* === Entry-point for tail calls === */
1405
1406
/* Zero-extend the 32-bit argument */
1407
emit(ctx, move, hi(r1), MIPS_R_ZERO);
1408
1409
/* If the eBPF frame pointer was accessed it must be saved */
1410
if (ctx->accessed & BIT(BPF_REG_FP))
1411
clobber_reg64(ctx, fp);
1412
1413
/* Compute the stack space needed for callee-saved registers */
1414
saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u32);
1415
saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
1416
1417
/* Stack space used by eBPF program local data */
1418
locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
1419
1420
/*
1421
* If we are emitting function calls, reserve extra stack space for
1422
* caller-saved registers and function arguments passed on the stack.
1423
* The required space is computed automatically during resource
1424
* usage discovery (pass 1).
1425
*/
1426
reserved = ctx->stack_used;
1427
1428
/* Allocate the stack frame */
1429
stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
1430
emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack);
1431
1432
/* Store callee-saved registers on stack */
1433
push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
1434
1435
/* Initialize the eBPF frame pointer if accessed */
1436
if (ctx->accessed & BIT(BPF_REG_FP))
1437
emit(ctx, addiu, lo(fp), MIPS_R_SP, stack - saved);
1438
1439
ctx->saved_size = saved;
1440
ctx->stack_size = stack;
1441
}
1442
1443
/* Build the program epilogue to restore the stack and registers */
1444
void build_epilogue(struct jit_context *ctx, int dest_reg)
1445
{
1446
/* Restore callee-saved registers from stack */
1447
pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
1448
ctx->stack_size - ctx->saved_size);
1449
/*
1450
* A 32-bit return value is always passed in MIPS register v0,
1451
* but on big-endian targets the low part of R0 is mapped to v1.
1452
*/
1453
#ifdef __BIG_ENDIAN
1454
emit(ctx, move, MIPS_R_V0, MIPS_R_V1);
1455
#endif
1456
1457
/* Jump to the return address and adjust the stack pointer */
1458
emit(ctx, jr, dest_reg);
1459
emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
1460
}
1461
1462
/* Build one eBPF instruction */
1463
int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
1464
{
1465
const u8 *dst = bpf2mips32[insn->dst_reg];
1466
const u8 *src = bpf2mips32[insn->src_reg];
1467
const u8 *res = bpf2mips32[BPF_REG_0];
1468
const u8 *tmp = bpf2mips32[JIT_REG_TMP];
1469
u8 code = insn->code;
1470
s16 off = insn->off;
1471
s32 imm = insn->imm;
1472
s32 val, rel;
1473
u8 alu, jmp;
1474
1475
switch (code) {
1476
/* ALU operations */
1477
/* dst = imm */
1478
case BPF_ALU | BPF_MOV | BPF_K:
1479
emit_mov_i(ctx, lo(dst), imm);
1480
emit_zext_ver(ctx, dst);
1481
break;
1482
/* dst = src */
1483
case BPF_ALU | BPF_MOV | BPF_X:
1484
if (imm == 1) {
1485
/* Special mov32 for zext */
1486
emit_mov_i(ctx, hi(dst), 0);
1487
} else {
1488
emit_mov_r(ctx, lo(dst), lo(src));
1489
emit_zext_ver(ctx, dst);
1490
}
1491
break;
1492
/* dst = -dst */
1493
case BPF_ALU | BPF_NEG:
1494
emit_alu_i(ctx, lo(dst), 0, BPF_NEG);
1495
emit_zext_ver(ctx, dst);
1496
break;
1497
/* dst = dst & imm */
1498
/* dst = dst | imm */
1499
/* dst = dst ^ imm */
1500
/* dst = dst << imm */
1501
/* dst = dst >> imm */
1502
/* dst = dst >> imm (arithmetic) */
1503
/* dst = dst + imm */
1504
/* dst = dst - imm */
1505
/* dst = dst * imm */
1506
/* dst = dst / imm */
1507
/* dst = dst % imm */
1508
case BPF_ALU | BPF_OR | BPF_K:
1509
case BPF_ALU | BPF_AND | BPF_K:
1510
case BPF_ALU | BPF_XOR | BPF_K:
1511
case BPF_ALU | BPF_LSH | BPF_K:
1512
case BPF_ALU | BPF_RSH | BPF_K:
1513
case BPF_ALU | BPF_ARSH | BPF_K:
1514
case BPF_ALU | BPF_ADD | BPF_K:
1515
case BPF_ALU | BPF_SUB | BPF_K:
1516
case BPF_ALU | BPF_MUL | BPF_K:
1517
case BPF_ALU | BPF_DIV | BPF_K:
1518
case BPF_ALU | BPF_MOD | BPF_K:
1519
if (!valid_alu_i(BPF_OP(code), imm)) {
1520
emit_mov_i(ctx, MIPS_R_T6, imm);
1521
emit_alu_r(ctx, lo(dst), MIPS_R_T6, BPF_OP(code));
1522
} else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
1523
emit_alu_i(ctx, lo(dst), val, alu);
1524
}
1525
emit_zext_ver(ctx, dst);
1526
break;
1527
/* dst = dst & src */
1528
/* dst = dst | src */
1529
/* dst = dst ^ src */
1530
/* dst = dst << src */
1531
/* dst = dst >> src */
1532
/* dst = dst >> src (arithmetic) */
1533
/* dst = dst + src */
1534
/* dst = dst - src */
1535
/* dst = dst * src */
1536
/* dst = dst / src */
1537
/* dst = dst % src */
1538
case BPF_ALU | BPF_AND | BPF_X:
1539
case BPF_ALU | BPF_OR | BPF_X:
1540
case BPF_ALU | BPF_XOR | BPF_X:
1541
case BPF_ALU | BPF_LSH | BPF_X:
1542
case BPF_ALU | BPF_RSH | BPF_X:
1543
case BPF_ALU | BPF_ARSH | BPF_X:
1544
case BPF_ALU | BPF_ADD | BPF_X:
1545
case BPF_ALU | BPF_SUB | BPF_X:
1546
case BPF_ALU | BPF_MUL | BPF_X:
1547
case BPF_ALU | BPF_DIV | BPF_X:
1548
case BPF_ALU | BPF_MOD | BPF_X:
1549
emit_alu_r(ctx, lo(dst), lo(src), BPF_OP(code));
1550
emit_zext_ver(ctx, dst);
1551
break;
1552
/* dst = imm (64-bit) */
1553
case BPF_ALU64 | BPF_MOV | BPF_K:
1554
emit_mov_se_i64(ctx, dst, imm);
1555
break;
1556
/* dst = src (64-bit) */
1557
case BPF_ALU64 | BPF_MOV | BPF_X:
1558
emit_mov_r(ctx, lo(dst), lo(src));
1559
emit_mov_r(ctx, hi(dst), hi(src));
1560
break;
1561
/* dst = -dst (64-bit) */
1562
case BPF_ALU64 | BPF_NEG:
1563
emit_neg_i64(ctx, dst);
1564
break;
1565
/* dst = dst & imm (64-bit) */
1566
case BPF_ALU64 | BPF_AND | BPF_K:
1567
emit_alu_i64(ctx, dst, imm, BPF_OP(code));
1568
break;
1569
/* dst = dst | imm (64-bit) */
1570
/* dst = dst ^ imm (64-bit) */
1571
/* dst = dst + imm (64-bit) */
1572
/* dst = dst - imm (64-bit) */
1573
case BPF_ALU64 | BPF_OR | BPF_K:
1574
case BPF_ALU64 | BPF_XOR | BPF_K:
1575
case BPF_ALU64 | BPF_ADD | BPF_K:
1576
case BPF_ALU64 | BPF_SUB | BPF_K:
1577
if (imm)
1578
emit_alu_i64(ctx, dst, imm, BPF_OP(code));
1579
break;
1580
/* dst = dst << imm (64-bit) */
1581
/* dst = dst >> imm (64-bit) */
1582
/* dst = dst >> imm (64-bit, arithmetic) */
1583
case BPF_ALU64 | BPF_LSH | BPF_K:
1584
case BPF_ALU64 | BPF_RSH | BPF_K:
1585
case BPF_ALU64 | BPF_ARSH | BPF_K:
1586
if (imm)
1587
emit_shift_i64(ctx, dst, imm, BPF_OP(code));
1588
break;
1589
/* dst = dst * imm (64-bit) */
1590
case BPF_ALU64 | BPF_MUL | BPF_K:
1591
emit_mul_i64(ctx, dst, imm);
1592
break;
1593
/* dst = dst / imm (64-bit) */
1594
/* dst = dst % imm (64-bit) */
1595
case BPF_ALU64 | BPF_DIV | BPF_K:
1596
case BPF_ALU64 | BPF_MOD | BPF_K:
1597
/*
1598
* Sign-extend the immediate value into a temporary register,
1599
* and then do the operation on this register.
1600
*/
1601
emit_mov_se_i64(ctx, tmp, imm);
1602
emit_divmod_r64(ctx, dst, tmp, BPF_OP(code));
1603
break;
1604
/* dst = dst & src (64-bit) */
1605
/* dst = dst | src (64-bit) */
1606
/* dst = dst ^ src (64-bit) */
1607
/* dst = dst + src (64-bit) */
1608
/* dst = dst - src (64-bit) */
1609
case BPF_ALU64 | BPF_AND | BPF_X:
1610
case BPF_ALU64 | BPF_OR | BPF_X:
1611
case BPF_ALU64 | BPF_XOR | BPF_X:
1612
case BPF_ALU64 | BPF_ADD | BPF_X:
1613
case BPF_ALU64 | BPF_SUB | BPF_X:
1614
emit_alu_r64(ctx, dst, src, BPF_OP(code));
1615
break;
1616
/* dst = dst << src (64-bit) */
1617
/* dst = dst >> src (64-bit) */
1618
/* dst = dst >> src (64-bit, arithmetic) */
1619
case BPF_ALU64 | BPF_LSH | BPF_X:
1620
case BPF_ALU64 | BPF_RSH | BPF_X:
1621
case BPF_ALU64 | BPF_ARSH | BPF_X:
1622
emit_shift_r64(ctx, dst, lo(src), BPF_OP(code));
1623
break;
1624
/* dst = dst * src (64-bit) */
1625
case BPF_ALU64 | BPF_MUL | BPF_X:
1626
emit_mul_r64(ctx, dst, src);
1627
break;
1628
/* dst = dst / src (64-bit) */
1629
/* dst = dst % src (64-bit) */
1630
case BPF_ALU64 | BPF_DIV | BPF_X:
1631
case BPF_ALU64 | BPF_MOD | BPF_X:
1632
emit_divmod_r64(ctx, dst, src, BPF_OP(code));
1633
break;
1634
/* dst = htole(dst) */
1635
/* dst = htobe(dst) */
1636
case BPF_ALU | BPF_END | BPF_FROM_LE:
1637
case BPF_ALU | BPF_END | BPF_FROM_BE:
1638
if (BPF_SRC(code) ==
1639
#ifdef __BIG_ENDIAN
1640
BPF_FROM_LE
1641
#else
1642
BPF_FROM_BE
1643
#endif
1644
)
1645
emit_bswap_r64(ctx, dst, imm);
1646
else
1647
emit_trunc_r64(ctx, dst, imm);
1648
break;
1649
/* dst = imm64 */
1650
case BPF_LD | BPF_IMM | BPF_DW:
1651
emit_mov_i(ctx, lo(dst), imm);
1652
emit_mov_i(ctx, hi(dst), insn[1].imm);
1653
return 1;
1654
/* LDX: dst = *(size *)(src + off) */
1655
case BPF_LDX | BPF_MEM | BPF_W:
1656
case BPF_LDX | BPF_MEM | BPF_H:
1657
case BPF_LDX | BPF_MEM | BPF_B:
1658
case BPF_LDX | BPF_MEM | BPF_DW:
1659
emit_ldx(ctx, dst, lo(src), off, BPF_SIZE(code));
1660
break;
1661
/* ST: *(size *)(dst + off) = imm */
1662
case BPF_ST | BPF_MEM | BPF_W:
1663
case BPF_ST | BPF_MEM | BPF_H:
1664
case BPF_ST | BPF_MEM | BPF_B:
1665
case BPF_ST | BPF_MEM | BPF_DW:
1666
switch (BPF_SIZE(code)) {
1667
case BPF_DW:
1668
/* Sign-extend immediate value into temporary reg */
1669
emit_mov_se_i64(ctx, tmp, imm);
1670
break;
1671
case BPF_W:
1672
case BPF_H:
1673
case BPF_B:
1674
emit_mov_i(ctx, lo(tmp), imm);
1675
break;
1676
}
1677
emit_stx(ctx, lo(dst), tmp, off, BPF_SIZE(code));
1678
break;
1679
/* STX: *(size *)(dst + off) = src */
1680
case BPF_STX | BPF_MEM | BPF_W:
1681
case BPF_STX | BPF_MEM | BPF_H:
1682
case BPF_STX | BPF_MEM | BPF_B:
1683
case BPF_STX | BPF_MEM | BPF_DW:
1684
emit_stx(ctx, lo(dst), src, off, BPF_SIZE(code));
1685
break;
1686
/* Speculation barrier */
1687
case BPF_ST | BPF_NOSPEC:
1688
break;
1689
/* Atomics */
1690
case BPF_STX | BPF_ATOMIC | BPF_W:
1691
switch (imm) {
1692
case BPF_ADD:
1693
case BPF_ADD | BPF_FETCH:
1694
case BPF_AND:
1695
case BPF_AND | BPF_FETCH:
1696
case BPF_OR:
1697
case BPF_OR | BPF_FETCH:
1698
case BPF_XOR:
1699
case BPF_XOR | BPF_FETCH:
1700
case BPF_XCHG:
1701
if (cpu_has_llsc)
1702
emit_atomic_r(ctx, lo(dst), lo(src), off, imm);
1703
else /* Non-ll/sc fallback */
1704
emit_atomic_r32(ctx, lo(dst), lo(src),
1705
off, imm);
1706
if (imm & BPF_FETCH)
1707
emit_zext_ver(ctx, src);
1708
break;
1709
case BPF_CMPXCHG:
1710
if (cpu_has_llsc)
1711
emit_cmpxchg_r(ctx, lo(dst), lo(src),
1712
lo(res), off);
1713
else /* Non-ll/sc fallback */
1714
emit_cmpxchg_r32(ctx, lo(dst), lo(src), off);
1715
/* Result zero-extension inserted by verifier */
1716
break;
1717
default:
1718
goto notyet;
1719
}
1720
break;
1721
/* Atomics (64-bit) */
1722
case BPF_STX | BPF_ATOMIC | BPF_DW:
1723
switch (imm) {
1724
case BPF_ADD:
1725
case BPF_ADD | BPF_FETCH:
1726
case BPF_AND:
1727
case BPF_AND | BPF_FETCH:
1728
case BPF_OR:
1729
case BPF_OR | BPF_FETCH:
1730
case BPF_XOR:
1731
case BPF_XOR | BPF_FETCH:
1732
case BPF_XCHG:
1733
emit_atomic_r64(ctx, lo(dst), src, off, imm);
1734
break;
1735
case BPF_CMPXCHG:
1736
emit_cmpxchg_r64(ctx, lo(dst), src, off);
1737
break;
1738
default:
1739
goto notyet;
1740
}
1741
break;
1742
/* PC += off if dst == src */
1743
/* PC += off if dst != src */
1744
/* PC += off if dst & src */
1745
/* PC += off if dst > src */
1746
/* PC += off if dst >= src */
1747
/* PC += off if dst < src */
1748
/* PC += off if dst <= src */
1749
/* PC += off if dst > src (signed) */
1750
/* PC += off if dst >= src (signed) */
1751
/* PC += off if dst < src (signed) */
1752
/* PC += off if dst <= src (signed) */
1753
case BPF_JMP32 | BPF_JEQ | BPF_X:
1754
case BPF_JMP32 | BPF_JNE | BPF_X:
1755
case BPF_JMP32 | BPF_JSET | BPF_X:
1756
case BPF_JMP32 | BPF_JGT | BPF_X:
1757
case BPF_JMP32 | BPF_JGE | BPF_X:
1758
case BPF_JMP32 | BPF_JLT | BPF_X:
1759
case BPF_JMP32 | BPF_JLE | BPF_X:
1760
case BPF_JMP32 | BPF_JSGT | BPF_X:
1761
case BPF_JMP32 | BPF_JSGE | BPF_X:
1762
case BPF_JMP32 | BPF_JSLT | BPF_X:
1763
case BPF_JMP32 | BPF_JSLE | BPF_X:
1764
if (off == 0)
1765
break;
1766
setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
1767
emit_jmp_r(ctx, lo(dst), lo(src), rel, jmp);
1768
if (finish_jmp(ctx, jmp, off) < 0)
1769
goto toofar;
1770
break;
1771
/* PC += off if dst == imm */
1772
/* PC += off if dst != imm */
1773
/* PC += off if dst & imm */
1774
/* PC += off if dst > imm */
1775
/* PC += off if dst >= imm */
1776
/* PC += off if dst < imm */
1777
/* PC += off if dst <= imm */
1778
/* PC += off if dst > imm (signed) */
1779
/* PC += off if dst >= imm (signed) */
1780
/* PC += off if dst < imm (signed) */
1781
/* PC += off if dst <= imm (signed) */
1782
case BPF_JMP32 | BPF_JEQ | BPF_K:
1783
case BPF_JMP32 | BPF_JNE | BPF_K:
1784
case BPF_JMP32 | BPF_JSET | BPF_K:
1785
case BPF_JMP32 | BPF_JGT | BPF_K:
1786
case BPF_JMP32 | BPF_JGE | BPF_K:
1787
case BPF_JMP32 | BPF_JLT | BPF_K:
1788
case BPF_JMP32 | BPF_JLE | BPF_K:
1789
case BPF_JMP32 | BPF_JSGT | BPF_K:
1790
case BPF_JMP32 | BPF_JSGE | BPF_K:
1791
case BPF_JMP32 | BPF_JSLT | BPF_K:
1792
case BPF_JMP32 | BPF_JSLE | BPF_K:
1793
if (off == 0)
1794
break;
1795
setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
1796
if (valid_jmp_i(jmp, imm)) {
1797
emit_jmp_i(ctx, lo(dst), imm, rel, jmp);
1798
} else {
1799
/* Move large immediate to register */
1800
emit_mov_i(ctx, MIPS_R_T6, imm);
1801
emit_jmp_r(ctx, lo(dst), MIPS_R_T6, rel, jmp);
1802
}
1803
if (finish_jmp(ctx, jmp, off) < 0)
1804
goto toofar;
1805
break;
1806
/* PC += off if dst == src */
1807
/* PC += off if dst != src */
1808
/* PC += off if dst & src */
1809
/* PC += off if dst > src */
1810
/* PC += off if dst >= src */
1811
/* PC += off if dst < src */
1812
/* PC += off if dst <= src */
1813
/* PC += off if dst > src (signed) */
1814
/* PC += off if dst >= src (signed) */
1815
/* PC += off if dst < src (signed) */
1816
/* PC += off if dst <= src (signed) */
1817
case BPF_JMP | BPF_JEQ | BPF_X:
1818
case BPF_JMP | BPF_JNE | BPF_X:
1819
case BPF_JMP | BPF_JSET | BPF_X:
1820
case BPF_JMP | BPF_JGT | BPF_X:
1821
case BPF_JMP | BPF_JGE | BPF_X:
1822
case BPF_JMP | BPF_JLT | BPF_X:
1823
case BPF_JMP | BPF_JLE | BPF_X:
1824
case BPF_JMP | BPF_JSGT | BPF_X:
1825
case BPF_JMP | BPF_JSGE | BPF_X:
1826
case BPF_JMP | BPF_JSLT | BPF_X:
1827
case BPF_JMP | BPF_JSLE | BPF_X:
1828
if (off == 0)
1829
break;
1830
setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
1831
emit_jmp_r64(ctx, dst, src, rel, jmp);
1832
if (finish_jmp(ctx, jmp, off) < 0)
1833
goto toofar;
1834
break;
1835
/* PC += off if dst == imm */
1836
/* PC += off if dst != imm */
1837
/* PC += off if dst & imm */
1838
/* PC += off if dst > imm */
1839
/* PC += off if dst >= imm */
1840
/* PC += off if dst < imm */
1841
/* PC += off if dst <= imm */
1842
/* PC += off if dst > imm (signed) */
1843
/* PC += off if dst >= imm (signed) */
1844
/* PC += off if dst < imm (signed) */
1845
/* PC += off if dst <= imm (signed) */
1846
case BPF_JMP | BPF_JEQ | BPF_K:
1847
case BPF_JMP | BPF_JNE | BPF_K:
1848
case BPF_JMP | BPF_JSET | BPF_K:
1849
case BPF_JMP | BPF_JGT | BPF_K:
1850
case BPF_JMP | BPF_JGE | BPF_K:
1851
case BPF_JMP | BPF_JLT | BPF_K:
1852
case BPF_JMP | BPF_JLE | BPF_K:
1853
case BPF_JMP | BPF_JSGT | BPF_K:
1854
case BPF_JMP | BPF_JSGE | BPF_K:
1855
case BPF_JMP | BPF_JSLT | BPF_K:
1856
case BPF_JMP | BPF_JSLE | BPF_K:
1857
if (off == 0)
1858
break;
1859
setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
1860
emit_jmp_i64(ctx, dst, imm, rel, jmp);
1861
if (finish_jmp(ctx, jmp, off) < 0)
1862
goto toofar;
1863
break;
1864
/* PC += off */
1865
case BPF_JMP | BPF_JA:
1866
if (off == 0)
1867
break;
1868
if (emit_ja(ctx, off) < 0)
1869
goto toofar;
1870
break;
1871
/* Tail call */
1872
case BPF_JMP | BPF_TAIL_CALL:
1873
if (emit_tail_call(ctx) < 0)
1874
goto invalid;
1875
break;
1876
/* Function call */
1877
case BPF_JMP | BPF_CALL:
1878
if (emit_call(ctx, insn) < 0)
1879
goto invalid;
1880
break;
1881
/* Function return */
1882
case BPF_JMP | BPF_EXIT:
1883
/*
1884
* Optimization: when last instruction is EXIT
1885
* simply continue to epilogue.
1886
*/
1887
if (ctx->bpf_index == ctx->program->len - 1)
1888
break;
1889
if (emit_exit(ctx) < 0)
1890
goto toofar;
1891
break;
1892
1893
default:
1894
invalid:
1895
pr_err_once("unknown opcode %02x\n", code);
1896
return -EINVAL;
1897
notyet:
1898
pr_info_once("*** NOT YET: opcode %02x ***\n", code);
1899
return -EFAULT;
1900
toofar:
1901
pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
1902
ctx->bpf_index, code);
1903
return -E2BIG;
1904
}
1905
return 0;
1906
}
1907
1908