Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/aarch64/inst/emit.rs
1693 views
1
//! AArch64 ISA: binary code emission.
2
3
use cranelift_control::ControlPlane;
4
5
use crate::ir::{self, types::*};
6
use crate::isa::aarch64::inst::*;
7
use crate::trace;
8
9
/// Memory addressing mode finalization: convert "special" modes (e.g.,
10
/// generic arbitrary stack offset) into real addressing modes, possibly by
11
/// emitting some helper instructions that come immediately before the use
12
/// of this amode.
13
pub fn mem_finalize(
14
sink: Option<&mut MachBuffer<Inst>>,
15
mem: &AMode,
16
access_ty: Type,
17
state: &EmitState,
18
) -> (SmallVec<[Inst; 4]>, AMode) {
19
match mem {
20
&AMode::RegOffset { off, .. }
21
| &AMode::SPOffset { off }
22
| &AMode::FPOffset { off }
23
| &AMode::IncomingArg { off }
24
| &AMode::SlotOffset { off } => {
25
let basereg = match mem {
26
&AMode::RegOffset { rn, .. } => rn,
27
&AMode::SPOffset { .. }
28
| &AMode::SlotOffset { .. }
29
| &AMode::IncomingArg { .. } => stack_reg(),
30
&AMode::FPOffset { .. } => fp_reg(),
31
_ => unreachable!(),
32
};
33
let off = match mem {
34
&AMode::IncomingArg { .. } => {
35
let frame_layout = state.frame_layout();
36
i64::from(
37
frame_layout.setup_area_size
38
+ frame_layout.tail_args_size
39
+ frame_layout.clobber_size
40
+ frame_layout.fixed_frame_storage_size
41
+ frame_layout.outgoing_args_size,
42
) - off
43
}
44
&AMode::SlotOffset { .. } => {
45
let adj = i64::from(state.frame_layout().outgoing_args_size);
46
trace!(
47
"mem_finalize: slot offset {} + adj {} -> {}",
48
off,
49
adj,
50
off + adj
51
);
52
off + adj
53
}
54
_ => off,
55
};
56
57
if let Some(simm9) = SImm9::maybe_from_i64(off) {
58
let mem = AMode::Unscaled { rn: basereg, simm9 };
59
(smallvec![], mem)
60
} else if let Some(uimm12) = UImm12Scaled::maybe_from_i64(off, access_ty) {
61
let mem = AMode::UnsignedOffset {
62
rn: basereg,
63
uimm12,
64
};
65
(smallvec![], mem)
66
} else {
67
let tmp = writable_spilltmp_reg();
68
(
69
Inst::load_constant(tmp, off as u64),
70
AMode::RegExtended {
71
rn: basereg,
72
rm: tmp.to_reg(),
73
extendop: ExtendOp::SXTX,
74
},
75
)
76
}
77
}
78
79
AMode::Const { addr } => {
80
let sink = match sink {
81
Some(sink) => sink,
82
None => return (smallvec![], mem.clone()),
83
};
84
let label = sink.get_label_for_constant(*addr);
85
let label = MemLabel::Mach(label);
86
(smallvec![], AMode::Label { label })
87
}
88
89
_ => (smallvec![], mem.clone()),
90
}
91
}
92
93
//=============================================================================
94
// Instructions and subcomponents: emission
95
96
pub(crate) fn machreg_to_gpr(m: Reg) -> u32 {
97
assert_eq!(m.class(), RegClass::Int);
98
u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
99
}
100
101
pub(crate) fn machreg_to_vec(m: Reg) -> u32 {
102
assert_eq!(m.class(), RegClass::Float);
103
u32::from(m.to_real_reg().unwrap().hw_enc())
104
}
105
106
fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
107
u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
108
}
109
110
/// Encode a 3-register aeithmeric instruction.
111
pub fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
112
(bits_31_21 << 21)
113
| (bits_15_10 << 10)
114
| machreg_to_gpr(rd.to_reg())
115
| (machreg_to_gpr(rn) << 5)
116
| (machreg_to_gpr(rm) << 16)
117
}
118
119
fn enc_arith_rr_imm12(
120
bits_31_24: u32,
121
immshift: u32,
122
imm12: u32,
123
rn: Reg,
124
rd: Writable<Reg>,
125
) -> u32 {
126
(bits_31_24 << 24)
127
| (immshift << 22)
128
| (imm12 << 10)
129
| (machreg_to_gpr(rn) << 5)
130
| machreg_to_gpr(rd.to_reg())
131
}
132
133
fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
134
(bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
135
}
136
137
fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
138
(top11 << 21)
139
| (machreg_to_gpr(rm) << 16)
140
| (bit15 << 15)
141
| (machreg_to_gpr(ra) << 10)
142
| (machreg_to_gpr(rn) << 5)
143
| machreg_to_gpr(rd.to_reg())
144
}
145
146
fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
147
assert!(off_26_0 < (1 << 26));
148
(op_31_26 << 26) | off_26_0
149
}
150
151
fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
152
assert!(off_18_0 < (1 << 19));
153
(op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
154
}
155
156
fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
157
assert!(off_18_0 < (1 << 19));
158
assert!(cond < (1 << 4));
159
(op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
160
}
161
162
/// Set the size bit of an instruction.
163
fn enc_op_size(op: u32, size: OperandSize) -> u32 {
164
(op & !(1 << 31)) | (size.sf_bit() << 31)
165
}
166
167
fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
168
match kind {
169
CondBrKind::Zero(reg, size) => enc_op_size(
170
enc_cmpbr(0b0_011010_0, taken.as_offset19_or_zero(), reg),
171
size,
172
),
173
CondBrKind::NotZero(reg, size) => enc_op_size(
174
enc_cmpbr(0b0_011010_1, taken.as_offset19_or_zero(), reg),
175
size,
176
),
177
CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
178
}
179
}
180
181
fn enc_test_bit_and_branch(
182
kind: TestBitAndBranchKind,
183
taken: BranchTarget,
184
reg: Reg,
185
bit: u8,
186
) -> u32 {
187
assert!(bit < 64);
188
let op_31 = u32::from(bit >> 5);
189
let op_23_19 = u32::from(bit & 0b11111);
190
let op_30_24 = 0b0110110
191
| match kind {
192
TestBitAndBranchKind::Z => 0,
193
TestBitAndBranchKind::NZ => 1,
194
};
195
(op_31 << 31)
196
| (op_30_24 << 24)
197
| (op_23_19 << 19)
198
| (taken.as_offset14_or_zero() << 5)
199
| machreg_to_gpr(reg)
200
}
201
202
/// Encode a move-wide instruction.
203
pub fn enc_move_wide(
204
op: MoveWideOp,
205
rd: Writable<Reg>,
206
imm: MoveWideConst,
207
size: OperandSize,
208
) -> u32 {
209
assert!(imm.shift <= 0b11);
210
let op = match op {
211
MoveWideOp::MovN => 0b00,
212
MoveWideOp::MovZ => 0b10,
213
};
214
0x12800000
215
| size.sf_bit() << 31
216
| op << 29
217
| u32::from(imm.shift) << 21
218
| u32::from(imm.bits) << 5
219
| machreg_to_gpr(rd.to_reg())
220
}
221
222
/// Encode a move-keep immediate instruction.
223
pub fn enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {
224
assert!(imm.shift <= 0b11);
225
0x72800000
226
| size.sf_bit() << 31
227
| u32::from(imm.shift) << 21
228
| u32::from(imm.bits) << 5
229
| machreg_to_gpr(rd.to_reg())
230
}
231
232
fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
233
(op_31_22 << 22)
234
| (simm7.bits() << 15)
235
| (machreg_to_gpr(rt2) << 10)
236
| (machreg_to_gpr(rn) << 5)
237
| machreg_to_gpr(rt)
238
}
239
240
fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
241
(op_31_22 << 22)
242
| (simm9.bits() << 12)
243
| (op_11_10 << 10)
244
| (machreg_to_gpr(rn) << 5)
245
| machreg_to_gpr_or_vec(rd)
246
}
247
248
fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
249
(op_31_22 << 22)
250
| (0b1 << 24)
251
| (uimm12.bits() << 10)
252
| (machreg_to_gpr(rn) << 5)
253
| machreg_to_gpr_or_vec(rd)
254
}
255
256
fn enc_ldst_reg(
257
op_31_22: u32,
258
rn: Reg,
259
rm: Reg,
260
s_bit: bool,
261
extendop: Option<ExtendOp>,
262
rd: Reg,
263
) -> u32 {
264
let s_bit = if s_bit { 1 } else { 0 };
265
let extend_bits = match extendop {
266
Some(ExtendOp::UXTW) => 0b010,
267
Some(ExtendOp::SXTW) => 0b110,
268
Some(ExtendOp::SXTX) => 0b111,
269
None => 0b011, // LSL
270
_ => panic!("bad extend mode for ld/st AMode"),
271
};
272
(op_31_22 << 22)
273
| (1 << 21)
274
| (machreg_to_gpr(rm) << 16)
275
| (extend_bits << 13)
276
| (s_bit << 12)
277
| (0b10 << 10)
278
| (machreg_to_gpr(rn) << 5)
279
| machreg_to_gpr_or_vec(rd)
280
}
281
282
pub(crate) fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
283
(op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
284
}
285
286
fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
287
debug_assert_eq!(q & 0b1, q);
288
debug_assert_eq!(size & 0b11, size);
289
0b0_0_0011010_10_00000_110_0_00_00000_00000
290
| q << 30
291
| size << 10
292
| machreg_to_gpr(rn) << 5
293
| machreg_to_vec(rt.to_reg())
294
}
295
296
fn enc_ldst_vec_pair(
297
opc: u32,
298
amode: u32,
299
is_load: bool,
300
simm7: SImm7Scaled,
301
rn: Reg,
302
rt: Reg,
303
rt2: Reg,
304
) -> u32 {
305
debug_assert_eq!(opc & 0b11, opc);
306
debug_assert_eq!(amode & 0b11, amode);
307
308
0b00_10110_00_0_0000000_00000_00000_00000
309
| opc << 30
310
| amode << 23
311
| (is_load as u32) << 22
312
| simm7.bits() << 15
313
| machreg_to_vec(rt2) << 10
314
| machreg_to_gpr(rn) << 5
315
| machreg_to_vec(rt)
316
}
317
318
fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
319
(top11 << 21)
320
| (machreg_to_vec(rm) << 16)
321
| (bit15_10 << 10)
322
| (machreg_to_vec(rn) << 5)
323
| machreg_to_vec(rd.to_reg())
324
}
325
326
fn enc_vec_rrr_long(
327
q: u32,
328
u: u32,
329
size: u32,
330
bit14: u32,
331
rm: Reg,
332
rn: Reg,
333
rd: Writable<Reg>,
334
) -> u32 {
335
debug_assert_eq!(q & 0b1, q);
336
debug_assert_eq!(u & 0b1, u);
337
debug_assert_eq!(size & 0b11, size);
338
debug_assert_eq!(bit14 & 0b1, bit14);
339
340
0b0_0_0_01110_00_1_00000_100000_00000_00000
341
| q << 30
342
| u << 29
343
| size << 22
344
| bit14 << 14
345
| (machreg_to_vec(rm) << 16)
346
| (machreg_to_vec(rn) << 5)
347
| machreg_to_vec(rd.to_reg())
348
}
349
350
fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
351
(0b01011010110 << 21)
352
| size << 31
353
| opcode2 << 16
354
| opcode1 << 10
355
| machreg_to_gpr(rn) << 5
356
| machreg_to_gpr(rd.to_reg())
357
}
358
359
pub(crate) fn enc_br(rn: Reg) -> u32 {
360
0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
361
}
362
363
pub(crate) fn enc_adr_inst(opcode: u32, off: i32, rd: Writable<Reg>) -> u32 {
364
let off = u32::try_from(off).unwrap();
365
let immlo = off & 3;
366
let immhi = (off >> 2) & ((1 << 19) - 1);
367
opcode | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
368
}
369
370
pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
371
let opcode = 0b00010000 << 24;
372
enc_adr_inst(opcode, off, rd)
373
}
374
375
pub(crate) fn enc_adrp(off: i32, rd: Writable<Reg>) -> u32 {
376
let opcode = 0b10010000 << 24;
377
enc_adr_inst(opcode, off, rd)
378
}
379
380
fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 {
381
debug_assert_eq!(op & 0b1, op);
382
debug_assert_eq!(o2 & 0b1, o2);
383
0b100_11010100_00000_0000_00_00000_00000
384
| (op << 30)
385
| (machreg_to_gpr(rm) << 16)
386
| (cond.bits() << 12)
387
| (o2 << 10)
388
| (machreg_to_gpr(rn) << 5)
389
| machreg_to_gpr(rd.to_reg())
390
}
391
392
fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
393
0b000_11110_00_1_00000_0000_11_00000_00000
394
| (size.ftype() << 22)
395
| (machreg_to_vec(rm) << 16)
396
| (machreg_to_vec(rn) << 5)
397
| machreg_to_vec(rd.to_reg())
398
| (cond.bits() << 12)
399
}
400
401
fn enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32 {
402
0b0_1_1_11010010_00000_0000_00_00000_0_0000
403
| size.sf_bit() << 31
404
| machreg_to_gpr(rm) << 16
405
| cond.bits() << 12
406
| machreg_to_gpr(rn) << 5
407
| nzcv.bits()
408
}
409
410
fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
411
0b0_1_1_11010010_00000_0000_10_00000_0_0000
412
| size.sf_bit() << 31
413
| imm.bits() << 16
414
| cond.bits() << 12
415
| machreg_to_gpr(rn) << 5
416
| nzcv.bits()
417
}
418
419
fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
420
match size {
421
OperandSize::Size64 => {
422
debug_assert!(immr <= 63);
423
debug_assert!(imms <= 63);
424
}
425
OperandSize::Size32 => {
426
debug_assert!(immr <= 31);
427
debug_assert!(imms <= 31);
428
}
429
}
430
debug_assert_eq!(opc & 0b11, opc);
431
let n_bit = size.sf_bit();
432
0b0_00_100110_0_000000_000000_00000_00000
433
| size.sf_bit() << 31
434
| u32::from(opc) << 29
435
| n_bit << 22
436
| u32::from(immr) << 16
437
| u32::from(imms) << 10
438
| machreg_to_gpr(rn) << 5
439
| machreg_to_gpr(rd.to_reg())
440
}
441
442
fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
443
0b00001110_101_00000_00011_1_00000_00000
444
| ((is_16b as u32) << 30)
445
| machreg_to_vec(rd.to_reg())
446
| (machreg_to_vec(rn) << 16)
447
| (machreg_to_vec(rn) << 5)
448
}
449
450
fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
451
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
452
}
453
454
fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
455
(top22 << 10)
456
| (machreg_to_vec(rm) << 16)
457
| (machreg_to_vec(rn) << 5)
458
| machreg_to_vec(rd.to_reg())
459
}
460
461
fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
462
(top17 << 15)
463
| (machreg_to_vec(rm) << 16)
464
| (machreg_to_vec(ra) << 10)
465
| (machreg_to_vec(rn) << 5)
466
| machreg_to_vec(rd.to_reg())
467
}
468
469
fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
470
0b000_11110_00_1_00000_00_1000_00000_00000
471
| (size.ftype() << 22)
472
| (machreg_to_vec(rm) << 16)
473
| (machreg_to_vec(rn) << 5)
474
}
475
476
fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
477
(top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
478
}
479
480
fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
481
(top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
482
}
483
484
fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
485
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
486
}
487
488
fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
489
debug_assert_eq!(qu & 0b11, qu);
490
debug_assert_eq!(size & 0b11, size);
491
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
492
let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
493
bits | qu << 29
494
| size << 22
495
| bits_12_16 << 12
496
| machreg_to_vec(rn) << 5
497
| machreg_to_vec(rd.to_reg())
498
}
499
500
fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
501
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
502
503
0b010_11110_11_11000_11011_10_00000_00000
504
| bits_12_16 << 12
505
| machreg_to_vec(rn) << 5
506
| machreg_to_vec(rd.to_reg())
507
}
508
509
fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
510
debug_assert_eq!(u & 0b1, u);
511
debug_assert_eq!(enc_size & 0b1, enc_size);
512
513
0b0_1_0_01110_00_10000_00_0_10_10_00000_00000
514
| u << 29
515
| enc_size << 22
516
| machreg_to_vec(rn) << 5
517
| machreg_to_vec(rd.to_reg())
518
}
519
520
fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
521
debug_assert_eq!(q & 0b1, q);
522
debug_assert_eq!(u & 0b1, u);
523
debug_assert_eq!(size & 0b11, size);
524
debug_assert_eq!(opcode & 0b11111, opcode);
525
0b0_0_0_01110_00_11000_0_0000_10_00000_00000
526
| q << 30
527
| u << 29
528
| size << 22
529
| opcode << 12
530
| machreg_to_vec(rn) << 5
531
| machreg_to_vec(rd.to_reg())
532
}
533
534
fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
535
debug_assert_eq!(len & 0b11, len);
536
0b0_1_001110_000_00000_0_00_0_00_00000_00000
537
| (machreg_to_vec(rm) << 16)
538
| len << 13
539
| (is_extension as u32) << 12
540
| (machreg_to_vec(rn) << 5)
541
| machreg_to_vec(rd.to_reg())
542
}
543
544
fn enc_dmb_ish() -> u32 {
545
0xD5033BBF
546
}
547
548
fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
549
assert!(machreg_to_gpr(rt.to_reg()) != 31);
550
let sz = match ty {
551
I64 => 0b11,
552
I32 => 0b10,
553
I16 => 0b01,
554
I8 => 0b00,
555
_ => unreachable!(),
556
};
557
let bit15 = match op {
558
AtomicRMWOp::Swp => 0b1,
559
_ => 0b0,
560
};
561
let op = match op {
562
AtomicRMWOp::Add => 0b000,
563
AtomicRMWOp::Clr => 0b001,
564
AtomicRMWOp::Eor => 0b010,
565
AtomicRMWOp::Set => 0b011,
566
AtomicRMWOp::Smax => 0b100,
567
AtomicRMWOp::Smin => 0b101,
568
AtomicRMWOp::Umax => 0b110,
569
AtomicRMWOp::Umin => 0b111,
570
AtomicRMWOp::Swp => 0b000,
571
};
572
0b00_111_000_111_00000_0_000_00_00000_00000
573
| (sz << 30)
574
| (machreg_to_gpr(rs) << 16)
575
| bit15 << 15
576
| (op << 12)
577
| (machreg_to_gpr(rn) << 5)
578
| machreg_to_gpr(rt.to_reg())
579
}
580
581
fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
582
let sz = match ty {
583
I64 => 0b11,
584
I32 => 0b10,
585
I16 => 0b01,
586
I8 => 0b00,
587
_ => unreachable!(),
588
};
589
0b00_001000_1_1_0_11111_1_11111_00000_00000
590
| (sz << 30)
591
| (machreg_to_gpr(rn) << 5)
592
| machreg_to_gpr(rt.to_reg())
593
}
594
595
fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {
596
let sz = match ty {
597
I64 => 0b11,
598
I32 => 0b10,
599
I16 => 0b01,
600
I8 => 0b00,
601
_ => unreachable!(),
602
};
603
0b00_001000_100_11111_1_11111_00000_00000
604
| (sz << 30)
605
| (machreg_to_gpr(rn) << 5)
606
| machreg_to_gpr(rt)
607
}
608
609
fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
610
let sz = match ty {
611
I64 => 0b11,
612
I32 => 0b10,
613
I16 => 0b01,
614
I8 => 0b00,
615
_ => unreachable!(),
616
};
617
0b00_001000_0_1_0_11111_1_11111_00000_00000
618
| (sz << 30)
619
| (machreg_to_gpr(rn) << 5)
620
| machreg_to_gpr(rt.to_reg())
621
}
622
623
fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
624
let sz = match ty {
625
I64 => 0b11,
626
I32 => 0b10,
627
I16 => 0b01,
628
I8 => 0b00,
629
_ => unreachable!(),
630
};
631
0b00_001000_000_00000_1_11111_00000_00000
632
| (sz << 30)
633
| (machreg_to_gpr(rs.to_reg()) << 16)
634
| (machreg_to_gpr(rn) << 5)
635
| machreg_to_gpr(rt)
636
}
637
638
fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
639
debug_assert_eq!(size & 0b11, size);
640
641
0b00_0010001_1_1_00000_1_11111_00000_00000
642
| size << 30
643
| machreg_to_gpr(rs.to_reg()) << 16
644
| machreg_to_gpr(rn) << 5
645
| machreg_to_gpr(rt)
646
}
647
648
fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
649
let abc = (imm >> 5) as u32;
650
let defgh = (imm & 0b11111) as u32;
651
652
debug_assert_eq!(cmode & 0b1111, cmode);
653
debug_assert_eq!(q_op & 0b11, q_op);
654
655
0b0_0_0_0111100000_000_0000_01_00000_00000
656
| (q_op << 29)
657
| (abc << 16)
658
| (cmode << 12)
659
| (defgh << 5)
660
| machreg_to_vec(rd.to_reg())
661
}
662
663
/// State carried between emissions of a sequence of instructions.
664
#[derive(Default, Clone, Debug)]
665
pub struct EmitState {
666
/// The user stack map for the upcoming instruction, as provided to
667
/// `pre_safepoint()`.
668
user_stack_map: Option<ir::UserStackMap>,
669
670
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
671
/// optimized away at compiletime. See [cranelift_control].
672
ctrl_plane: ControlPlane,
673
674
frame_layout: FrameLayout,
675
}
676
677
impl MachInstEmitState<Inst> for EmitState {
678
fn new(abi: &Callee<AArch64MachineDeps>, ctrl_plane: ControlPlane) -> Self {
679
EmitState {
680
user_stack_map: None,
681
ctrl_plane,
682
frame_layout: abi.frame_layout().clone(),
683
}
684
}
685
686
fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
687
self.user_stack_map = user_stack_map;
688
}
689
690
fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
691
&mut self.ctrl_plane
692
}
693
694
fn take_ctrl_plane(self) -> ControlPlane {
695
self.ctrl_plane
696
}
697
698
fn frame_layout(&self) -> &FrameLayout {
699
&self.frame_layout
700
}
701
}
702
703
impl EmitState {
704
fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
705
self.user_stack_map.take()
706
}
707
708
fn clear_post_insn(&mut self) {
709
self.user_stack_map = None;
710
}
711
}
712
713
/// Constant state used during function compilation.
714
pub struct EmitInfo(settings::Flags);
715
716
impl EmitInfo {
717
/// Create a constant state for emission of instructions.
718
pub fn new(flags: settings::Flags) -> Self {
719
Self(flags)
720
}
721
}
722
723
impl MachInstEmit for Inst {
724
type State = EmitState;
725
type Info = EmitInfo;
726
727
fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
728
// N.B.: we *must* not exceed the "worst-case size" used to compute
729
// where to insert islands, except when islands are explicitly triggered
730
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
731
// to allow disabling the check for `JTSequence`, which is always
732
// emitted following an `EmitIsland`.
733
let mut start_off = sink.cur_offset();
734
735
match self {
736
&Inst::AluRRR {
737
alu_op,
738
size,
739
rd,
740
rn,
741
rm,
742
} => {
743
debug_assert!(match alu_op {
744
ALUOp::SMulH | ALUOp::UMulH => size == OperandSize::Size64,
745
_ => true,
746
});
747
let top11 = match alu_op {
748
ALUOp::Add => 0b00001011_000,
749
ALUOp::Adc => 0b00011010_000,
750
ALUOp::AdcS => 0b00111010_000,
751
ALUOp::Sub => 0b01001011_000,
752
ALUOp::Sbc => 0b01011010_000,
753
ALUOp::SbcS => 0b01111010_000,
754
ALUOp::Orr => 0b00101010_000,
755
ALUOp::And => 0b00001010_000,
756
ALUOp::AndS => 0b01101010_000,
757
ALUOp::Eor => 0b01001010_000,
758
ALUOp::OrrNot => 0b00101010_001,
759
ALUOp::AndNot => 0b00001010_001,
760
ALUOp::EorNot => 0b01001010_001,
761
ALUOp::AddS => 0b00101011_000,
762
ALUOp::SubS => 0b01101011_000,
763
ALUOp::SDiv | ALUOp::UDiv => 0b00011010_110,
764
ALUOp::Extr | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110,
765
ALUOp::SMulH => 0b10011011_010,
766
ALUOp::UMulH => 0b10011011_110,
767
};
768
769
let top11 = top11 | size.sf_bit() << 10;
770
let bit15_10 = match alu_op {
771
ALUOp::SDiv => 0b000011,
772
ALUOp::UDiv => 0b000010,
773
ALUOp::Extr => 0b001011,
774
ALUOp::Lsr => 0b001001,
775
ALUOp::Asr => 0b001010,
776
ALUOp::Lsl => 0b001000,
777
ALUOp::SMulH | ALUOp::UMulH => 0b011111,
778
_ => 0b000000,
779
};
780
debug_assert_ne!(writable_stack_reg(), rd);
781
// The stack pointer is the zero register in this context, so this might be an
782
// indication that something is wrong.
783
debug_assert_ne!(stack_reg(), rn);
784
debug_assert_ne!(stack_reg(), rm);
785
sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
786
}
787
&Inst::AluRRRR {
788
alu_op,
789
size,
790
rd,
791
rm,
792
rn,
793
ra,
794
} => {
795
let (top11, bit15) = match alu_op {
796
ALUOp3::MAdd => (0b0_00_11011_000, 0),
797
ALUOp3::MSub => (0b0_00_11011_000, 1),
798
ALUOp3::UMAddL => {
799
debug_assert!(size == OperandSize::Size32);
800
(0b1_00_11011_1_01, 0)
801
}
802
ALUOp3::SMAddL => {
803
debug_assert!(size == OperandSize::Size32);
804
(0b1_00_11011_0_01, 0)
805
}
806
};
807
let top11 = top11 | size.sf_bit() << 10;
808
sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
809
}
810
&Inst::AluRRImm12 {
811
alu_op,
812
size,
813
rd,
814
rn,
815
ref imm12,
816
} => {
817
let top8 = match alu_op {
818
ALUOp::Add => 0b000_10001,
819
ALUOp::Sub => 0b010_10001,
820
ALUOp::AddS => 0b001_10001,
821
ALUOp::SubS => 0b011_10001,
822
_ => unimplemented!("{:?}", alu_op),
823
};
824
let top8 = top8 | size.sf_bit() << 7;
825
sink.put4(enc_arith_rr_imm12(
826
top8,
827
imm12.shift_bits(),
828
imm12.imm_bits(),
829
rn,
830
rd,
831
));
832
}
833
&Inst::AluRRImmLogic {
834
alu_op,
835
size,
836
rd,
837
rn,
838
ref imml,
839
} => {
840
let (top9, inv) = match alu_op {
841
ALUOp::Orr => (0b001_100100, false),
842
ALUOp::And => (0b000_100100, false),
843
ALUOp::AndS => (0b011_100100, false),
844
ALUOp::Eor => (0b010_100100, false),
845
ALUOp::OrrNot => (0b001_100100, true),
846
ALUOp::AndNot => (0b000_100100, true),
847
ALUOp::EorNot => (0b010_100100, true),
848
_ => unimplemented!("{:?}", alu_op),
849
};
850
let top9 = top9 | size.sf_bit() << 8;
851
let imml = if inv { imml.invert() } else { *imml };
852
sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
853
}
854
855
&Inst::AluRRImmShift {
856
alu_op,
857
size,
858
rd,
859
rn,
860
ref immshift,
861
} => {
862
let amt = immshift.value();
863
let (top10, immr, imms) = match alu_op {
864
ALUOp::Extr => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
865
ALUOp::Lsr => (0b0101001100, u32::from(amt), 0b011111),
866
ALUOp::Asr => (0b0001001100, u32::from(amt), 0b011111),
867
ALUOp::Lsl => {
868
let bits = if size.is64() { 64 } else { 32 };
869
(
870
0b0101001100,
871
u32::from((bits - amt) % bits),
872
u32::from(bits - 1 - amt),
873
)
874
}
875
_ => unimplemented!("{:?}", alu_op),
876
};
877
let top10 = top10 | size.sf_bit() << 9 | size.sf_bit();
878
let imms = match alu_op {
879
ALUOp::Lsr | ALUOp::Asr => imms | size.sf_bit() << 5,
880
_ => imms,
881
};
882
sink.put4(
883
(top10 << 22)
884
| (immr << 16)
885
| (imms << 10)
886
| (machreg_to_gpr(rn) << 5)
887
| machreg_to_gpr(rd.to_reg()),
888
);
889
}
890
891
&Inst::AluRRRShift {
892
alu_op,
893
size,
894
rd,
895
rn,
896
rm,
897
ref shiftop,
898
} => {
899
let top11: u32 = match alu_op {
900
ALUOp::Add => 0b000_01011000,
901
ALUOp::AddS => 0b001_01011000,
902
ALUOp::Sub => 0b010_01011000,
903
ALUOp::SubS => 0b011_01011000,
904
ALUOp::Orr => 0b001_01010000,
905
ALUOp::And => 0b000_01010000,
906
ALUOp::AndS => 0b011_01010000,
907
ALUOp::Eor => 0b010_01010000,
908
ALUOp::OrrNot => 0b001_01010001,
909
ALUOp::EorNot => 0b010_01010001,
910
ALUOp::AndNot => 0b000_01010001,
911
ALUOp::Extr => 0b000_10011100,
912
_ => unimplemented!("{:?}", alu_op),
913
};
914
let top11 = top11 | size.sf_bit() << 10;
915
let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
916
let bits_15_10 = u32::from(shiftop.amt().value());
917
sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
918
}
919
920
&Inst::AluRRRExtend {
921
alu_op,
922
size,
923
rd,
924
rn,
925
rm,
926
extendop,
927
} => {
928
let top11: u32 = match alu_op {
929
ALUOp::Add => 0b00001011001,
930
ALUOp::Sub => 0b01001011001,
931
ALUOp::AddS => 0b00101011001,
932
ALUOp::SubS => 0b01101011001,
933
_ => unimplemented!("{:?}", alu_op),
934
};
935
let top11 = top11 | size.sf_bit() << 10;
936
let bits_15_10 = u32::from(extendop.bits()) << 3;
937
sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
938
}
939
940
&Inst::BitRR {
941
op, size, rd, rn, ..
942
} => {
943
let (op1, op2) = match op {
944
BitOp::RBit => (0b00000, 0b000000),
945
BitOp::Clz => (0b00000, 0b000100),
946
BitOp::Cls => (0b00000, 0b000101),
947
BitOp::Rev16 => (0b00000, 0b000001),
948
BitOp::Rev32 => (0b00000, 0b000010),
949
BitOp::Rev64 => (0b00000, 0b000011),
950
};
951
sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd))
952
}
953
954
&Inst::ULoad8 { rd, ref mem, flags }
955
| &Inst::SLoad8 { rd, ref mem, flags }
956
| &Inst::ULoad16 { rd, ref mem, flags }
957
| &Inst::SLoad16 { rd, ref mem, flags }
958
| &Inst::ULoad32 { rd, ref mem, flags }
959
| &Inst::SLoad32 { rd, ref mem, flags }
960
| &Inst::ULoad64 {
961
rd, ref mem, flags, ..
962
}
963
| &Inst::FpuLoad16 { rd, ref mem, flags }
964
| &Inst::FpuLoad32 { rd, ref mem, flags }
965
| &Inst::FpuLoad64 { rd, ref mem, flags }
966
| &Inst::FpuLoad128 { rd, ref mem, flags } => {
967
let mem = mem.clone();
968
let access_ty = self.mem_type().unwrap();
969
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
970
971
for inst in mem_insts.into_iter() {
972
inst.emit(sink, emit_info, state);
973
}
974
975
// ldst encoding helpers take Reg, not Writable<Reg>.
976
let rd = rd.to_reg();
977
978
// This is the base opcode (top 10 bits) for the "unscaled
979
// immediate" form (Unscaled). Other addressing modes will OR in
980
// other values for bits 24/25 (bits 1/2 of this constant).
981
let op = match self {
982
Inst::ULoad8 { .. } => 0b0011100001,
983
Inst::SLoad8 { .. } => 0b0011100010,
984
Inst::ULoad16 { .. } => 0b0111100001,
985
Inst::SLoad16 { .. } => 0b0111100010,
986
Inst::ULoad32 { .. } => 0b1011100001,
987
Inst::SLoad32 { .. } => 0b1011100010,
988
Inst::ULoad64 { .. } => 0b1111100001,
989
Inst::FpuLoad16 { .. } => 0b0111110001,
990
Inst::FpuLoad32 { .. } => 0b1011110001,
991
Inst::FpuLoad64 { .. } => 0b1111110001,
992
Inst::FpuLoad128 { .. } => 0b0011110011,
993
_ => unreachable!(),
994
};
995
996
if let Some(trap_code) = flags.trap_code() {
997
// Register the offset at which the actual load instruction starts.
998
sink.add_trap(trap_code);
999
}
1000
1001
match &mem {
1002
&AMode::Unscaled { rn, simm9 } => {
1003
let reg = rn;
1004
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1005
}
1006
&AMode::UnsignedOffset { rn, uimm12 } => {
1007
let reg = rn;
1008
sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1009
}
1010
&AMode::RegReg { rn, rm } => {
1011
let r1 = rn;
1012
let r2 = rm;
1013
sink.put4(enc_ldst_reg(
1014
op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1015
));
1016
}
1017
&AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1018
let r1 = rn;
1019
let r2 = rm;
1020
let extendop = match &mem {
1021
&AMode::RegScaled { .. } => None,
1022
&AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1023
_ => unreachable!(),
1024
};
1025
sink.put4(enc_ldst_reg(
1026
op, r1, r2, /* scaled = */ true, extendop, rd,
1027
));
1028
}
1029
&AMode::RegExtended { rn, rm, extendop } => {
1030
let r1 = rn;
1031
let r2 = rm;
1032
sink.put4(enc_ldst_reg(
1033
op,
1034
r1,
1035
r2,
1036
/* scaled = */ false,
1037
Some(extendop),
1038
rd,
1039
));
1040
}
1041
&AMode::Label { ref label } => {
1042
let offset = match label {
1043
// cast i32 to u32 (two's-complement)
1044
MemLabel::PCRel(off) => *off as u32,
1045
// Emit a relocation into the `MachBuffer`
1046
// for the label that's being loaded from and
1047
// encode an address of 0 in its place which will
1048
// get filled in by relocation resolution later on.
1049
MemLabel::Mach(label) => {
1050
sink.use_label_at_offset(
1051
sink.cur_offset(),
1052
*label,
1053
LabelUse::Ldr19,
1054
);
1055
0
1056
}
1057
} / 4;
1058
assert!(offset < (1 << 19));
1059
match self {
1060
&Inst::ULoad32 { .. } => {
1061
sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
1062
}
1063
&Inst::SLoad32 { .. } => {
1064
sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
1065
}
1066
&Inst::FpuLoad32 { .. } => {
1067
sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
1068
}
1069
&Inst::ULoad64 { .. } => {
1070
sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
1071
}
1072
&Inst::FpuLoad64 { .. } => {
1073
sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
1074
}
1075
&Inst::FpuLoad128 { .. } => {
1076
sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
1077
}
1078
_ => panic!("Unsupported size for LDR from constant pool!"),
1079
}
1080
}
1081
&AMode::SPPreIndexed { simm9 } => {
1082
let reg = stack_reg();
1083
sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1084
}
1085
&AMode::SPPostIndexed { simm9 } => {
1086
let reg = stack_reg();
1087
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1088
}
1089
// Eliminated by `mem_finalize()` above.
1090
&AMode::SPOffset { .. }
1091
| &AMode::FPOffset { .. }
1092
| &AMode::IncomingArg { .. }
1093
| &AMode::SlotOffset { .. }
1094
| &AMode::Const { .. }
1095
| &AMode::RegOffset { .. } => {
1096
panic!("Should not see {mem:?} here!")
1097
}
1098
}
1099
}
1100
1101
&Inst::Store8 { rd, ref mem, flags }
1102
| &Inst::Store16 { rd, ref mem, flags }
1103
| &Inst::Store32 { rd, ref mem, flags }
1104
| &Inst::Store64 { rd, ref mem, flags }
1105
| &Inst::FpuStore16 { rd, ref mem, flags }
1106
| &Inst::FpuStore32 { rd, ref mem, flags }
1107
| &Inst::FpuStore64 { rd, ref mem, flags }
1108
| &Inst::FpuStore128 { rd, ref mem, flags } => {
1109
let mem = mem.clone();
1110
let access_ty = self.mem_type().unwrap();
1111
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
1112
1113
for inst in mem_insts.into_iter() {
1114
inst.emit(sink, emit_info, state);
1115
}
1116
1117
let op = match self {
1118
Inst::Store8 { .. } => 0b0011100000,
1119
Inst::Store16 { .. } => 0b0111100000,
1120
Inst::Store32 { .. } => 0b1011100000,
1121
Inst::Store64 { .. } => 0b1111100000,
1122
Inst::FpuStore16 { .. } => 0b0111110000,
1123
Inst::FpuStore32 { .. } => 0b1011110000,
1124
Inst::FpuStore64 { .. } => 0b1111110000,
1125
Inst::FpuStore128 { .. } => 0b0011110010,
1126
_ => unreachable!(),
1127
};
1128
1129
if let Some(trap_code) = flags.trap_code() {
1130
// Register the offset at which the actual store instruction starts.
1131
sink.add_trap(trap_code);
1132
}
1133
1134
match &mem {
1135
&AMode::Unscaled { rn, simm9 } => {
1136
let reg = rn;
1137
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1138
}
1139
&AMode::UnsignedOffset { rn, uimm12 } => {
1140
let reg = rn;
1141
sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1142
}
1143
&AMode::RegReg { rn, rm } => {
1144
let r1 = rn;
1145
let r2 = rm;
1146
sink.put4(enc_ldst_reg(
1147
op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1148
));
1149
}
1150
&AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1151
let r1 = rn;
1152
let r2 = rm;
1153
let extendop = match &mem {
1154
&AMode::RegScaled { .. } => None,
1155
&AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1156
_ => unreachable!(),
1157
};
1158
sink.put4(enc_ldst_reg(
1159
op, r1, r2, /* scaled = */ true, extendop, rd,
1160
));
1161
}
1162
&AMode::RegExtended { rn, rm, extendop } => {
1163
let r1 = rn;
1164
let r2 = rm;
1165
sink.put4(enc_ldst_reg(
1166
op,
1167
r1,
1168
r2,
1169
/* scaled = */ false,
1170
Some(extendop),
1171
rd,
1172
));
1173
}
1174
&AMode::Label { .. } => {
1175
panic!("Store to a MemLabel not implemented!");
1176
}
1177
&AMode::SPPreIndexed { simm9 } => {
1178
let reg = stack_reg();
1179
sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1180
}
1181
&AMode::SPPostIndexed { simm9 } => {
1182
let reg = stack_reg();
1183
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1184
}
1185
// Eliminated by `mem_finalize()` above.
1186
&AMode::SPOffset { .. }
1187
| &AMode::FPOffset { .. }
1188
| &AMode::IncomingArg { .. }
1189
| &AMode::SlotOffset { .. }
1190
| &AMode::Const { .. }
1191
| &AMode::RegOffset { .. } => {
1192
panic!("Should not see {mem:?} here!")
1193
}
1194
}
1195
}
1196
1197
&Inst::StoreP64 {
1198
rt,
1199
rt2,
1200
ref mem,
1201
flags,
1202
} => {
1203
let mem = mem.clone();
1204
if let Some(trap_code) = flags.trap_code() {
1205
// Register the offset at which the actual store instruction starts.
1206
sink.add_trap(trap_code);
1207
}
1208
match &mem {
1209
&PairAMode::SignedOffset { reg, simm7 } => {
1210
assert_eq!(simm7.scale_ty, I64);
1211
sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
1212
}
1213
&PairAMode::SPPreIndexed { simm7 } => {
1214
assert_eq!(simm7.scale_ty, I64);
1215
let reg = stack_reg();
1216
sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));
1217
}
1218
&PairAMode::SPPostIndexed { simm7 } => {
1219
assert_eq!(simm7.scale_ty, I64);
1220
let reg = stack_reg();
1221
sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));
1222
}
1223
}
1224
}
1225
&Inst::LoadP64 {
1226
rt,
1227
rt2,
1228
ref mem,
1229
flags,
1230
} => {
1231
let rt = rt.to_reg();
1232
let rt2 = rt2.to_reg();
1233
let mem = mem.clone();
1234
if let Some(trap_code) = flags.trap_code() {
1235
// Register the offset at which the actual load instruction starts.
1236
sink.add_trap(trap_code);
1237
}
1238
1239
match &mem {
1240
&PairAMode::SignedOffset { reg, simm7 } => {
1241
assert_eq!(simm7.scale_ty, I64);
1242
sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
1243
}
1244
&PairAMode::SPPreIndexed { simm7 } => {
1245
assert_eq!(simm7.scale_ty, I64);
1246
let reg = stack_reg();
1247
sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));
1248
}
1249
&PairAMode::SPPostIndexed { simm7 } => {
1250
assert_eq!(simm7.scale_ty, I64);
1251
let reg = stack_reg();
1252
sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));
1253
}
1254
}
1255
}
1256
&Inst::FpuLoadP64 {
1257
rt,
1258
rt2,
1259
ref mem,
1260
flags,
1261
}
1262
| &Inst::FpuLoadP128 {
1263
rt,
1264
rt2,
1265
ref mem,
1266
flags,
1267
} => {
1268
let rt = rt.to_reg();
1269
let rt2 = rt2.to_reg();
1270
let mem = mem.clone();
1271
1272
if let Some(trap_code) = flags.trap_code() {
1273
// Register the offset at which the actual load instruction starts.
1274
sink.add_trap(trap_code);
1275
}
1276
1277
let opc = match self {
1278
&Inst::FpuLoadP64 { .. } => 0b01,
1279
&Inst::FpuLoadP128 { .. } => 0b10,
1280
_ => unreachable!(),
1281
};
1282
1283
match &mem {
1284
&PairAMode::SignedOffset { reg, simm7 } => {
1285
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1286
sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
1287
}
1288
&PairAMode::SPPreIndexed { simm7 } => {
1289
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1290
let reg = stack_reg();
1291
sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));
1292
}
1293
&PairAMode::SPPostIndexed { simm7 } => {
1294
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1295
let reg = stack_reg();
1296
sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));
1297
}
1298
}
1299
}
1300
&Inst::FpuStoreP64 {
1301
rt,
1302
rt2,
1303
ref mem,
1304
flags,
1305
}
1306
| &Inst::FpuStoreP128 {
1307
rt,
1308
rt2,
1309
ref mem,
1310
flags,
1311
} => {
1312
let mem = mem.clone();
1313
1314
if let Some(trap_code) = flags.trap_code() {
1315
// Register the offset at which the actual store instruction starts.
1316
sink.add_trap(trap_code);
1317
}
1318
1319
let opc = match self {
1320
&Inst::FpuStoreP64 { .. } => 0b01,
1321
&Inst::FpuStoreP128 { .. } => 0b10,
1322
_ => unreachable!(),
1323
};
1324
1325
match &mem {
1326
&PairAMode::SignedOffset { reg, simm7 } => {
1327
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1328
sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
1329
}
1330
&PairAMode::SPPreIndexed { simm7 } => {
1331
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1332
let reg = stack_reg();
1333
sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));
1334
}
1335
&PairAMode::SPPostIndexed { simm7 } => {
1336
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1337
let reg = stack_reg();
1338
sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));
1339
}
1340
}
1341
}
1342
&Inst::Mov { size, rd, rm } => {
1343
assert!(rd.to_reg().class() == rm.class());
1344
assert!(rm.class() == RegClass::Int);
1345
1346
match size {
1347
OperandSize::Size64 => {
1348
// MOV to SP is interpreted as MOV to XZR instead. And our codegen
1349
// should never MOV to XZR.
1350
assert!(rd.to_reg() != stack_reg());
1351
1352
if rm == stack_reg() {
1353
// We can't use ORR here, so use an `add rd, sp, #0` instead.
1354
let imm12 = Imm12::maybe_from_u64(0).unwrap();
1355
sink.put4(enc_arith_rr_imm12(
1356
0b100_10001,
1357
imm12.shift_bits(),
1358
imm12.imm_bits(),
1359
rm,
1360
rd,
1361
));
1362
} else {
1363
// Encoded as ORR rd, rm, zero.
1364
sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
1365
}
1366
}
1367
OperandSize::Size32 => {
1368
// MOV to SP is interpreted as MOV to XZR instead. And our codegen
1369
// should never MOV to XZR.
1370
assert!(machreg_to_gpr(rd.to_reg()) != 31);
1371
// Encoded as ORR rd, rm, zero.
1372
sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
1373
}
1374
}
1375
}
1376
&Inst::MovFromPReg { rd, rm } => {
1377
let rm: Reg = rm.into();
1378
debug_assert!(
1379
[
1380
regs::fp_reg(),
1381
regs::stack_reg(),
1382
regs::link_reg(),
1383
regs::pinned_reg()
1384
]
1385
.contains(&rm)
1386
);
1387
assert!(rm.class() == RegClass::Int);
1388
assert!(rd.to_reg().class() == rm.class());
1389
let size = OperandSize::Size64;
1390
Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1391
}
1392
&Inst::MovToPReg { rd, rm } => {
1393
let rd: Writable<Reg> = Writable::from_reg(rd.into());
1394
debug_assert!(
1395
[
1396
regs::fp_reg(),
1397
regs::stack_reg(),
1398
regs::link_reg(),
1399
regs::pinned_reg()
1400
]
1401
.contains(&rd.to_reg())
1402
);
1403
assert!(rd.to_reg().class() == RegClass::Int);
1404
assert!(rm.class() == rd.to_reg().class());
1405
let size = OperandSize::Size64;
1406
Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1407
}
1408
&Inst::MovWide { op, rd, imm, size } => {
1409
sink.put4(enc_move_wide(op, rd, imm, size));
1410
}
1411
&Inst::MovK { rd, rn, imm, size } => {
1412
debug_assert_eq!(rn, rd.to_reg());
1413
sink.put4(enc_movk(rd, imm, size));
1414
}
1415
&Inst::CSel { rd, rn, rm, cond } => {
1416
sink.put4(enc_csel(rd, rn, rm, cond, 0, 0));
1417
}
1418
&Inst::CSNeg { rd, rn, rm, cond } => {
1419
sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));
1420
}
1421
&Inst::CSet { rd, cond } => {
1422
sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));
1423
}
1424
&Inst::CSetm { rd, cond } => {
1425
sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0));
1426
}
1427
&Inst::CCmp {
1428
size,
1429
rn,
1430
rm,
1431
nzcv,
1432
cond,
1433
} => {
1434
sink.put4(enc_ccmp(size, rn, rm, nzcv, cond));
1435
}
1436
&Inst::CCmpImm {
1437
size,
1438
rn,
1439
imm,
1440
nzcv,
1441
cond,
1442
} => {
1443
sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
1444
}
1445
&Inst::AtomicRMW {
1446
ty,
1447
op,
1448
rs,
1449
rt,
1450
rn,
1451
flags,
1452
} => {
1453
if let Some(trap_code) = flags.trap_code() {
1454
sink.add_trap(trap_code);
1455
}
1456
1457
sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
1458
}
1459
&Inst::AtomicRMWLoop { ty, op, flags, .. } => {
1460
/* Emit this:
1461
again:
1462
ldaxr{,b,h} x/w27, [x25]
1463
// maybe sign extend
1464
op x28, x27, x26 // op is add,sub,and,orr,eor
1465
stlxr{,b,h} w24, x/w28, [x25]
1466
cbnz x24, again
1467
1468
Operand conventions:
1469
IN: x25 (addr), x26 (2nd arg for op)
1470
OUT: x27 (old value), x24 (trashed), x28 (trashed)
1471
1472
It is unfortunate that, per the ARM documentation, x28 cannot be used for
1473
both the store-data and success-flag operands of stlxr. This causes the
1474
instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
1475
instead for the success-flag.
1476
*/
1477
// TODO: We should not hardcode registers here, a better idea would be to
1478
// pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those
1479
let xzr = zero_reg();
1480
let x24 = xreg(24);
1481
let x25 = xreg(25);
1482
let x26 = xreg(26);
1483
let x27 = xreg(27);
1484
let x28 = xreg(28);
1485
let x24wr = writable_xreg(24);
1486
let x27wr = writable_xreg(27);
1487
let x28wr = writable_xreg(28);
1488
let again_label = sink.get_label();
1489
1490
// again:
1491
sink.bind_label(again_label, &mut state.ctrl_plane);
1492
1493
if let Some(trap_code) = flags.trap_code() {
1494
sink.add_trap(trap_code);
1495
}
1496
1497
sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
1498
let size = OperandSize::from_ty(ty);
1499
let sign_ext = match op {
1500
AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {
1501
I16 => Some((ExtendOp::SXTH, 16)),
1502
I8 => Some((ExtendOp::SXTB, 8)),
1503
_ => None,
1504
},
1505
_ => None,
1506
};
1507
1508
// sxt{b|h} the loaded result if necessary.
1509
if sign_ext.is_some() {
1510
let (_, from_bits) = sign_ext.unwrap();
1511
Inst::Extend {
1512
rd: x27wr,
1513
rn: x27,
1514
signed: true,
1515
from_bits,
1516
to_bits: size.bits(),
1517
}
1518
.emit(sink, emit_info, state);
1519
}
1520
1521
match op {
1522
AtomicRMWLoopOp::Xchg => {} // do nothing
1523
AtomicRMWLoopOp::Nand => {
1524
// and x28, x27, x26
1525
// mvn x28, x28
1526
1527
Inst::AluRRR {
1528
alu_op: ALUOp::And,
1529
size,
1530
rd: x28wr,
1531
rn: x27,
1532
rm: x26,
1533
}
1534
.emit(sink, emit_info, state);
1535
1536
Inst::AluRRR {
1537
alu_op: ALUOp::OrrNot,
1538
size,
1539
rd: x28wr,
1540
rn: xzr,
1541
rm: x28,
1542
}
1543
.emit(sink, emit_info, state);
1544
}
1545
AtomicRMWLoopOp::Umin
1546
| AtomicRMWLoopOp::Umax
1547
| AtomicRMWLoopOp::Smin
1548
| AtomicRMWLoopOp::Smax => {
1549
// cmp x27, x26 {?sxt}
1550
// csel.op x28, x27, x26
1551
1552
let cond = match op {
1553
AtomicRMWLoopOp::Umin => Cond::Lo,
1554
AtomicRMWLoopOp::Umax => Cond::Hi,
1555
AtomicRMWLoopOp::Smin => Cond::Lt,
1556
AtomicRMWLoopOp::Smax => Cond::Gt,
1557
_ => unreachable!(),
1558
};
1559
1560
if sign_ext.is_some() {
1561
let (extendop, _) = sign_ext.unwrap();
1562
Inst::AluRRRExtend {
1563
alu_op: ALUOp::SubS,
1564
size,
1565
rd: writable_zero_reg(),
1566
rn: x27,
1567
rm: x26,
1568
extendop,
1569
}
1570
.emit(sink, emit_info, state);
1571
} else {
1572
Inst::AluRRR {
1573
alu_op: ALUOp::SubS,
1574
size,
1575
rd: writable_zero_reg(),
1576
rn: x27,
1577
rm: x26,
1578
}
1579
.emit(sink, emit_info, state);
1580
}
1581
1582
Inst::CSel {
1583
cond,
1584
rd: x28wr,
1585
rn: x27,
1586
rm: x26,
1587
}
1588
.emit(sink, emit_info, state);
1589
}
1590
_ => {
1591
// add/sub/and/orr/eor x28, x27, x26
1592
let alu_op = match op {
1593
AtomicRMWLoopOp::Add => ALUOp::Add,
1594
AtomicRMWLoopOp::Sub => ALUOp::Sub,
1595
AtomicRMWLoopOp::And => ALUOp::And,
1596
AtomicRMWLoopOp::Orr => ALUOp::Orr,
1597
AtomicRMWLoopOp::Eor => ALUOp::Eor,
1598
AtomicRMWLoopOp::Nand
1599
| AtomicRMWLoopOp::Umin
1600
| AtomicRMWLoopOp::Umax
1601
| AtomicRMWLoopOp::Smin
1602
| AtomicRMWLoopOp::Smax
1603
| AtomicRMWLoopOp::Xchg => unreachable!(),
1604
};
1605
1606
Inst::AluRRR {
1607
alu_op,
1608
size,
1609
rd: x28wr,
1610
rn: x27,
1611
rm: x26,
1612
}
1613
.emit(sink, emit_info, state);
1614
}
1615
}
1616
1617
if let Some(trap_code) = flags.trap_code() {
1618
sink.add_trap(trap_code);
1619
}
1620
if op == AtomicRMWLoopOp::Xchg {
1621
sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
1622
} else {
1623
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1624
}
1625
1626
// cbnz w24, again
1627
// Note, we're actually testing x24, and relying on the default zero-high-half
1628
// rule in the assignment that `stlxr` does.
1629
let br_offset = sink.cur_offset();
1630
sink.put4(enc_conditional_br(
1631
BranchTarget::Label(again_label),
1632
CondBrKind::NotZero(x24, OperandSize::Size64),
1633
));
1634
sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
1635
}
1636
&Inst::AtomicCAS {
1637
rd,
1638
rs,
1639
rt,
1640
rn,
1641
ty,
1642
flags,
1643
} => {
1644
debug_assert_eq!(rd.to_reg(), rs);
1645
let size = match ty {
1646
I8 => 0b00,
1647
I16 => 0b01,
1648
I32 => 0b10,
1649
I64 => 0b11,
1650
_ => panic!("Unsupported type: {ty}"),
1651
};
1652
1653
if let Some(trap_code) = flags.trap_code() {
1654
sink.add_trap(trap_code);
1655
}
1656
1657
sink.put4(enc_cas(size, rd, rt, rn));
1658
}
1659
&Inst::AtomicCASLoop { ty, flags, .. } => {
1660
/* Emit this:
1661
again:
1662
ldaxr{,b,h} x/w27, [x25]
1663
cmp x27, x/w26 uxt{b,h}
1664
b.ne out
1665
stlxr{,b,h} w24, x/w28, [x25]
1666
cbnz x24, again
1667
out:
1668
1669
Operand conventions:
1670
IN: x25 (addr), x26 (expected value), x28 (replacement value)
1671
OUT: x27 (old value), x24 (trashed)
1672
*/
1673
let x24 = xreg(24);
1674
let x25 = xreg(25);
1675
let x26 = xreg(26);
1676
let x27 = xreg(27);
1677
let x28 = xreg(28);
1678
let xzrwr = writable_zero_reg();
1679
let x24wr = writable_xreg(24);
1680
let x27wr = writable_xreg(27);
1681
let again_label = sink.get_label();
1682
let out_label = sink.get_label();
1683
1684
// again:
1685
sink.bind_label(again_label, &mut state.ctrl_plane);
1686
1687
if let Some(trap_code) = flags.trap_code() {
1688
sink.add_trap(trap_code);
1689
}
1690
1691
// ldaxr x27, [x25]
1692
sink.put4(enc_ldaxr(ty, x27wr, x25));
1693
1694
// The top 32-bits are zero-extended by the ldaxr so we don't
1695
// have to use UXTW, just the x-form of the register.
1696
let (bit21, extend_op) = match ty {
1697
I8 => (0b1, 0b000000),
1698
I16 => (0b1, 0b001000),
1699
_ => (0b0, 0b000000),
1700
};
1701
let bits_31_21 = 0b111_01011_000 | bit21;
1702
// cmp x27, x26 (== subs xzr, x27, x26)
1703
sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));
1704
1705
// b.ne out
1706
let br_out_offset = sink.cur_offset();
1707
sink.put4(enc_conditional_br(
1708
BranchTarget::Label(out_label),
1709
CondBrKind::Cond(Cond::Ne),
1710
));
1711
sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
1712
1713
if let Some(trap_code) = flags.trap_code() {
1714
sink.add_trap(trap_code);
1715
}
1716
1717
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1718
1719
// cbnz w24, again.
1720
// Note, we're actually testing x24, and relying on the default zero-high-half
1721
// rule in the assignment that `stlxr` does.
1722
let br_again_offset = sink.cur_offset();
1723
sink.put4(enc_conditional_br(
1724
BranchTarget::Label(again_label),
1725
CondBrKind::NotZero(x24, OperandSize::Size64),
1726
));
1727
sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
1728
1729
// out:
1730
sink.bind_label(out_label, &mut state.ctrl_plane);
1731
}
1732
&Inst::LoadAcquire {
1733
access_ty,
1734
rt,
1735
rn,
1736
flags,
1737
} => {
1738
if let Some(trap_code) = flags.trap_code() {
1739
sink.add_trap(trap_code);
1740
}
1741
1742
sink.put4(enc_ldar(access_ty, rt, rn));
1743
}
1744
&Inst::StoreRelease {
1745
access_ty,
1746
rt,
1747
rn,
1748
flags,
1749
} => {
1750
if let Some(trap_code) = flags.trap_code() {
1751
sink.add_trap(trap_code);
1752
}
1753
1754
sink.put4(enc_stlr(access_ty, rt, rn));
1755
}
1756
&Inst::Fence {} => {
1757
sink.put4(enc_dmb_ish()); // dmb ish
1758
}
1759
&Inst::Csdb {} => {
1760
sink.put4(0xd503229f);
1761
}
1762
&Inst::FpuMove32 { rd, rn } => {
1763
sink.put4(enc_fpurr(0b000_11110_00_1_000000_10000, rd, rn));
1764
}
1765
&Inst::FpuMove64 { rd, rn } => {
1766
sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
1767
}
1768
&Inst::FpuMove128 { rd, rn } => {
1769
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
1770
}
1771
&Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1772
let (imm5, shift, mask) = match size.lane_size() {
1773
ScalarSize::Size32 => (0b00100, 3, 0b011),
1774
ScalarSize::Size64 => (0b01000, 4, 0b001),
1775
_ => unimplemented!(),
1776
};
1777
debug_assert_eq!(idx & mask, idx);
1778
let imm5 = imm5 | ((idx as u32) << shift);
1779
sink.put4(
1780
0b010_11110000_00000_000001_00000_00000
1781
| (imm5 << 16)
1782
| (machreg_to_vec(rn) << 5)
1783
| machreg_to_vec(rd.to_reg()),
1784
);
1785
}
1786
&Inst::FpuExtend { rd, rn, size } => {
1787
sink.put4(enc_fpurr(
1788
0b000_11110_00_1_000000_10000 | (size.ftype() << 12),
1789
rd,
1790
rn,
1791
));
1792
}
1793
&Inst::FpuRR {
1794
fpu_op,
1795
size,
1796
rd,
1797
rn,
1798
} => {
1799
let top22 = match fpu_op {
1800
FPUOp1::Abs => 0b000_11110_00_1_000001_10000,
1801
FPUOp1::Neg => 0b000_11110_00_1_000010_10000,
1802
FPUOp1::Sqrt => 0b000_11110_00_1_000011_10000,
1803
FPUOp1::Cvt32To64 => {
1804
debug_assert_eq!(size, ScalarSize::Size32);
1805
0b000_11110_00_1_000101_10000
1806
}
1807
FPUOp1::Cvt64To32 => {
1808
debug_assert_eq!(size, ScalarSize::Size64);
1809
0b000_11110_01_1_000100_10000
1810
}
1811
};
1812
let top22 = top22 | size.ftype() << 12;
1813
sink.put4(enc_fpurr(top22, rd, rn));
1814
}
1815
&Inst::FpuRRR {
1816
fpu_op,
1817
size,
1818
rd,
1819
rn,
1820
rm,
1821
} => {
1822
let top22 = match fpu_op {
1823
FPUOp2::Add => 0b000_11110_00_1_00000_001010,
1824
FPUOp2::Sub => 0b000_11110_00_1_00000_001110,
1825
FPUOp2::Mul => 0b000_11110_00_1_00000_000010,
1826
FPUOp2::Div => 0b000_11110_00_1_00000_000110,
1827
FPUOp2::Max => 0b000_11110_00_1_00000_010010,
1828
FPUOp2::Min => 0b000_11110_00_1_00000_010110,
1829
};
1830
let top22 = top22 | size.ftype() << 12;
1831
sink.put4(enc_fpurrr(top22, rd, rn, rm));
1832
}
1833
&Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
1834
FPUOpRI::UShr32(imm) => {
1835
debug_assert_eq!(32, imm.lane_size_in_bits);
1836
sink.put4(
1837
0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
1838
| imm.enc() << 16
1839
| machreg_to_vec(rn) << 5
1840
| machreg_to_vec(rd.to_reg()),
1841
)
1842
}
1843
FPUOpRI::UShr64(imm) => {
1844
debug_assert_eq!(64, imm.lane_size_in_bits);
1845
sink.put4(
1846
0b01_1_111110_0000000_00_0_0_0_1_00000_00000
1847
| imm.enc() << 16
1848
| machreg_to_vec(rn) << 5
1849
| machreg_to_vec(rd.to_reg()),
1850
)
1851
}
1852
},
1853
&Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1854
debug_assert_eq!(rd.to_reg(), ri);
1855
match fpu_op {
1856
FPUOpRIMod::Sli64(imm) => {
1857
debug_assert_eq!(64, imm.lane_size_in_bits);
1858
sink.put4(
1859
0b01_1_111110_0000000_010101_00000_00000
1860
| imm.enc() << 16
1861
| machreg_to_vec(rn) << 5
1862
| machreg_to_vec(rd.to_reg()),
1863
)
1864
}
1865
FPUOpRIMod::Sli32(imm) => {
1866
debug_assert_eq!(32, imm.lane_size_in_bits);
1867
sink.put4(
1868
0b0_0_1_011110_0000000_010101_00000_00000
1869
| imm.enc() << 16
1870
| machreg_to_vec(rn) << 5
1871
| machreg_to_vec(rd.to_reg()),
1872
)
1873
}
1874
}
1875
}
1876
&Inst::FpuRRRR {
1877
fpu_op,
1878
size,
1879
rd,
1880
rn,
1881
rm,
1882
ra,
1883
} => {
1884
let top17 = match fpu_op {
1885
FPUOp3::MAdd => 0b000_11111_00_0_00000_0,
1886
FPUOp3::MSub => 0b000_11111_00_0_00000_1,
1887
FPUOp3::NMAdd => 0b000_11111_00_1_00000_0,
1888
FPUOp3::NMSub => 0b000_11111_00_1_00000_1,
1889
};
1890
let top17 = top17 | size.ftype() << 7;
1891
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
1892
}
1893
&Inst::VecMisc { op, rd, rn, size } => {
1894
let (q, enc_size) = size.enc_size();
1895
let (u, bits_12_16, size) = match op {
1896
VecMisc2::Not => (0b1, 0b00101, 0b00),
1897
VecMisc2::Neg => (0b1, 0b01011, enc_size),
1898
VecMisc2::Abs => (0b0, 0b01011, enc_size),
1899
VecMisc2::Fabs => {
1900
debug_assert!(
1901
size == VectorSize::Size32x2
1902
|| size == VectorSize::Size32x4
1903
|| size == VectorSize::Size64x2
1904
);
1905
(0b0, 0b01111, enc_size)
1906
}
1907
VecMisc2::Fneg => {
1908
debug_assert!(
1909
size == VectorSize::Size32x2
1910
|| size == VectorSize::Size32x4
1911
|| size == VectorSize::Size64x2
1912
);
1913
(0b1, 0b01111, enc_size)
1914
}
1915
VecMisc2::Fsqrt => {
1916
debug_assert!(
1917
size == VectorSize::Size32x2
1918
|| size == VectorSize::Size32x4
1919
|| size == VectorSize::Size64x2
1920
);
1921
(0b1, 0b11111, enc_size)
1922
}
1923
VecMisc2::Rev16 => {
1924
debug_assert_eq!(size, VectorSize::Size8x16);
1925
(0b0, 0b00001, enc_size)
1926
}
1927
VecMisc2::Rev32 => {
1928
debug_assert!(size == VectorSize::Size8x16 || size == VectorSize::Size16x8);
1929
(0b1, 0b00000, enc_size)
1930
}
1931
VecMisc2::Rev64 => {
1932
debug_assert!(
1933
size == VectorSize::Size8x16
1934
|| size == VectorSize::Size16x8
1935
|| size == VectorSize::Size32x4
1936
);
1937
(0b0, 0b00000, enc_size)
1938
}
1939
VecMisc2::Fcvtzs => {
1940
debug_assert!(
1941
size == VectorSize::Size32x2
1942
|| size == VectorSize::Size32x4
1943
|| size == VectorSize::Size64x2
1944
);
1945
(0b0, 0b11011, enc_size)
1946
}
1947
VecMisc2::Fcvtzu => {
1948
debug_assert!(
1949
size == VectorSize::Size32x2
1950
|| size == VectorSize::Size32x4
1951
|| size == VectorSize::Size64x2
1952
);
1953
(0b1, 0b11011, enc_size)
1954
}
1955
VecMisc2::Scvtf => {
1956
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1957
(0b0, 0b11101, enc_size & 0b1)
1958
}
1959
VecMisc2::Ucvtf => {
1960
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1961
(0b1, 0b11101, enc_size & 0b1)
1962
}
1963
VecMisc2::Frintn => {
1964
debug_assert!(
1965
size == VectorSize::Size32x2
1966
|| size == VectorSize::Size32x4
1967
|| size == VectorSize::Size64x2
1968
);
1969
(0b0, 0b11000, enc_size & 0b01)
1970
}
1971
VecMisc2::Frintz => {
1972
debug_assert!(
1973
size == VectorSize::Size32x2
1974
|| size == VectorSize::Size32x4
1975
|| size == VectorSize::Size64x2
1976
);
1977
(0b0, 0b11001, enc_size)
1978
}
1979
VecMisc2::Frintm => {
1980
debug_assert!(
1981
size == VectorSize::Size32x2
1982
|| size == VectorSize::Size32x4
1983
|| size == VectorSize::Size64x2
1984
);
1985
(0b0, 0b11001, enc_size & 0b01)
1986
}
1987
VecMisc2::Frintp => {
1988
debug_assert!(
1989
size == VectorSize::Size32x2
1990
|| size == VectorSize::Size32x4
1991
|| size == VectorSize::Size64x2
1992
);
1993
(0b0, 0b11000, enc_size)
1994
}
1995
VecMisc2::Cnt => {
1996
debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
1997
(0b0, 0b00101, enc_size)
1998
}
1999
VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
2000
VecMisc2::Cmge0 => (0b1, 0b01000, enc_size),
2001
VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size),
2002
VecMisc2::Cmle0 => (0b1, 0b01001, enc_size),
2003
VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size),
2004
VecMisc2::Fcmeq0 => {
2005
debug_assert!(
2006
size == VectorSize::Size32x2
2007
|| size == VectorSize::Size32x4
2008
|| size == VectorSize::Size64x2
2009
);
2010
(0b0, 0b01101, enc_size)
2011
}
2012
VecMisc2::Fcmge0 => {
2013
debug_assert!(
2014
size == VectorSize::Size32x2
2015
|| size == VectorSize::Size32x4
2016
|| size == VectorSize::Size64x2
2017
);
2018
(0b1, 0b01100, enc_size)
2019
}
2020
VecMisc2::Fcmgt0 => {
2021
debug_assert!(
2022
size == VectorSize::Size32x2
2023
|| size == VectorSize::Size32x4
2024
|| size == VectorSize::Size64x2
2025
);
2026
(0b0, 0b01100, enc_size)
2027
}
2028
VecMisc2::Fcmle0 => {
2029
debug_assert!(
2030
size == VectorSize::Size32x2
2031
|| size == VectorSize::Size32x4
2032
|| size == VectorSize::Size64x2
2033
);
2034
(0b1, 0b01101, enc_size)
2035
}
2036
VecMisc2::Fcmlt0 => {
2037
debug_assert!(
2038
size == VectorSize::Size32x2
2039
|| size == VectorSize::Size32x4
2040
|| size == VectorSize::Size64x2
2041
);
2042
(0b0, 0b01110, enc_size)
2043
}
2044
};
2045
sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
2046
}
2047
&Inst::VecLanes { op, rd, rn, size } => {
2048
let (q, size) = match size {
2049
VectorSize::Size8x8 => (0b0, 0b00),
2050
VectorSize::Size8x16 => (0b1, 0b00),
2051
VectorSize::Size16x4 => (0b0, 0b01),
2052
VectorSize::Size16x8 => (0b1, 0b01),
2053
VectorSize::Size32x4 => (0b1, 0b10),
2054
_ => unreachable!(),
2055
};
2056
let (u, opcode) = match op {
2057
VecLanesOp::Uminv => (0b1, 0b11010),
2058
VecLanesOp::Addv => (0b0, 0b11011),
2059
};
2060
sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
2061
}
2062
&Inst::VecShiftImm {
2063
op,
2064
rd,
2065
rn,
2066
size,
2067
imm,
2068
} => {
2069
let (is_shr, mut template) = match op {
2070
VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32),
2071
VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32),
2072
VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32),
2073
};
2074
if size.is_128bits() {
2075
template |= 0b1 << 30;
2076
}
2077
let imm = imm as u32;
2078
// Deal with the somewhat strange encoding scheme for, and limits on,
2079
// the shift amount.
2080
let immh_immb = match (size.lane_size(), is_shr) {
2081
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2082
0b_1000_000_u32 | (64 - imm)
2083
}
2084
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2085
0b_0100_000_u32 | (32 - imm)
2086
}
2087
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2088
0b_0010_000_u32 | (16 - imm)
2089
}
2090
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2091
0b_0001_000_u32 | (8 - imm)
2092
}
2093
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2094
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2095
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2096
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2097
_ => panic!(
2098
"aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2099
),
2100
};
2101
let rn_enc = machreg_to_vec(rn);
2102
let rd_enc = machreg_to_vec(rd.to_reg());
2103
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2104
}
2105
&Inst::VecShiftImmMod {
2106
op,
2107
rd,
2108
ri,
2109
rn,
2110
size,
2111
imm,
2112
} => {
2113
debug_assert_eq!(rd.to_reg(), ri);
2114
let (is_shr, mut template) = match op {
2115
VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
2116
};
2117
if size.is_128bits() {
2118
template |= 0b1 << 30;
2119
}
2120
let imm = imm as u32;
2121
// Deal with the somewhat strange encoding scheme for, and limits on,
2122
// the shift amount.
2123
let immh_immb = match (size.lane_size(), is_shr) {
2124
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2125
0b_1000_000_u32 | (64 - imm)
2126
}
2127
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2128
0b_0100_000_u32 | (32 - imm)
2129
}
2130
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2131
0b_0010_000_u32 | (16 - imm)
2132
}
2133
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2134
0b_0001_000_u32 | (8 - imm)
2135
}
2136
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2137
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2138
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2139
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2140
_ => panic!(
2141
"aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2142
),
2143
};
2144
let rn_enc = machreg_to_vec(rn);
2145
let rd_enc = machreg_to_vec(rd.to_reg());
2146
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2147
}
2148
&Inst::VecExtract { rd, rn, rm, imm4 } => {
2149
if imm4 < 16 {
2150
let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
2151
let rm_enc = machreg_to_vec(rm);
2152
let rn_enc = machreg_to_vec(rn);
2153
let rd_enc = machreg_to_vec(rd.to_reg());
2154
sink.put4(
2155
template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
2156
);
2157
} else {
2158
panic!("aarch64: Inst::VecExtract: emit: invalid extract index {imm4}");
2159
}
2160
}
2161
&Inst::VecTbl { rd, rn, rm } => {
2162
sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm));
2163
}
2164
&Inst::VecTblExt { rd, ri, rn, rm } => {
2165
debug_assert_eq!(rd.to_reg(), ri);
2166
sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm));
2167
}
2168
&Inst::VecTbl2 { rd, rn, rn2, rm } => {
2169
assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2170
sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm));
2171
}
2172
&Inst::VecTbl2Ext {
2173
rd,
2174
ri,
2175
rn,
2176
rn2,
2177
rm,
2178
} => {
2179
debug_assert_eq!(rd.to_reg(), ri);
2180
assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2181
sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm));
2182
}
2183
&Inst::FpuCmp { size, rn, rm } => {
2184
sink.put4(enc_fcmp(size, rn, rm));
2185
}
2186
&Inst::FpuToInt { op, rd, rn } => {
2187
let top16 = match op {
2188
// FCVTZS (32/32-bit)
2189
FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
2190
// FCVTZU (32/32-bit)
2191
FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
2192
// FCVTZS (32/64-bit)
2193
FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
2194
// FCVTZU (32/64-bit)
2195
FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
2196
// FCVTZS (64/32-bit)
2197
FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
2198
// FCVTZU (64/32-bit)
2199
FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
2200
// FCVTZS (64/64-bit)
2201
FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
2202
// FCVTZU (64/64-bit)
2203
FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
2204
};
2205
sink.put4(enc_fputoint(top16, rd, rn));
2206
}
2207
&Inst::IntToFpu { op, rd, rn } => {
2208
let top16 = match op {
2209
// SCVTF (32/32-bit)
2210
IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
2211
// UCVTF (32/32-bit)
2212
IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
2213
// SCVTF (64/32-bit)
2214
IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
2215
// UCVTF (64/32-bit)
2216
IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
2217
// SCVTF (32/64-bit)
2218
IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
2219
// UCVTF (32/64-bit)
2220
IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
2221
// SCVTF (64/64-bit)
2222
IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
2223
// UCVTF (64/64-bit)
2224
IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
2225
};
2226
sink.put4(enc_inttofpu(top16, rd, rn));
2227
}
2228
&Inst::FpuCSel16 { rd, rn, rm, cond } => {
2229
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size16));
2230
}
2231
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
2232
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
2233
}
2234
&Inst::FpuCSel64 { rd, rn, rm, cond } => {
2235
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
2236
}
2237
&Inst::FpuRound { op, rd, rn } => {
2238
let top22 = match op {
2239
FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
2240
FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
2241
FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
2242
FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
2243
FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
2244
FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
2245
FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
2246
FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
2247
};
2248
sink.put4(enc_fround(top22, rd, rn));
2249
}
2250
&Inst::MovToFpu { rd, rn, size } => {
2251
let template = match size {
2252
ScalarSize::Size16 => 0b000_11110_11_1_00_111_000000_00000_00000,
2253
ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
2254
ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
2255
_ => unreachable!(),
2256
};
2257
sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
2258
}
2259
&Inst::FpuMoveFPImm { rd, imm, size } => {
2260
sink.put4(
2261
0b000_11110_00_1_00_000_000100_00000_00000
2262
| size.ftype() << 22
2263
| ((imm.enc_bits() as u32) << 13)
2264
| machreg_to_vec(rd.to_reg()),
2265
);
2266
}
2267
&Inst::MovToVec {
2268
rd,
2269
ri,
2270
rn,
2271
idx,
2272
size,
2273
} => {
2274
debug_assert_eq!(rd.to_reg(), ri);
2275
let (imm5, shift) = match size.lane_size() {
2276
ScalarSize::Size8 => (0b00001, 1),
2277
ScalarSize::Size16 => (0b00010, 2),
2278
ScalarSize::Size32 => (0b00100, 3),
2279
ScalarSize::Size64 => (0b01000, 4),
2280
_ => unreachable!(),
2281
};
2282
debug_assert_eq!(idx & (0b11111 >> shift), idx);
2283
let imm5 = imm5 | ((idx as u32) << shift);
2284
sink.put4(
2285
0b010_01110000_00000_0_0011_1_00000_00000
2286
| (imm5 << 16)
2287
| (machreg_to_gpr(rn) << 5)
2288
| machreg_to_vec(rd.to_reg()),
2289
);
2290
}
2291
&Inst::MovFromVec { rd, rn, idx, size } => {
2292
let (q, imm5, shift, mask) = match size {
2293
ScalarSize::Size8 => (0b0, 0b00001, 1, 0b1111),
2294
ScalarSize::Size16 => (0b0, 0b00010, 2, 0b0111),
2295
ScalarSize::Size32 => (0b0, 0b00100, 3, 0b0011),
2296
ScalarSize::Size64 => (0b1, 0b01000, 4, 0b0001),
2297
_ => panic!("Unexpected scalar FP operand size: {size:?}"),
2298
};
2299
debug_assert_eq!(idx & mask, idx);
2300
let imm5 = imm5 | ((idx as u32) << shift);
2301
sink.put4(
2302
0b000_01110000_00000_0_0111_1_00000_00000
2303
| (q << 30)
2304
| (imm5 << 16)
2305
| (machreg_to_vec(rn) << 5)
2306
| machreg_to_gpr(rd.to_reg()),
2307
);
2308
}
2309
&Inst::MovFromVecSigned {
2310
rd,
2311
rn,
2312
idx,
2313
size,
2314
scalar_size,
2315
} => {
2316
let (imm5, shift, half) = match size {
2317
VectorSize::Size8x8 => (0b00001, 1, true),
2318
VectorSize::Size8x16 => (0b00001, 1, false),
2319
VectorSize::Size16x4 => (0b00010, 2, true),
2320
VectorSize::Size16x8 => (0b00010, 2, false),
2321
VectorSize::Size32x2 => {
2322
debug_assert_ne!(scalar_size, OperandSize::Size32);
2323
(0b00100, 3, true)
2324
}
2325
VectorSize::Size32x4 => {
2326
debug_assert_ne!(scalar_size, OperandSize::Size32);
2327
(0b00100, 3, false)
2328
}
2329
_ => panic!("Unexpected vector operand size"),
2330
};
2331
debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
2332
let imm5 = imm5 | ((idx as u32) << shift);
2333
sink.put4(
2334
0b000_01110000_00000_0_0101_1_00000_00000
2335
| (scalar_size.is64() as u32) << 30
2336
| (imm5 << 16)
2337
| (machreg_to_vec(rn) << 5)
2338
| machreg_to_gpr(rd.to_reg()),
2339
);
2340
}
2341
&Inst::VecDup { rd, rn, size } => {
2342
let q = size.is_128bits() as u32;
2343
let imm5 = match size.lane_size() {
2344
ScalarSize::Size8 => 0b00001,
2345
ScalarSize::Size16 => 0b00010,
2346
ScalarSize::Size32 => 0b00100,
2347
ScalarSize::Size64 => 0b01000,
2348
_ => unreachable!(),
2349
};
2350
sink.put4(
2351
0b0_0_0_01110000_00000_000011_00000_00000
2352
| (q << 30)
2353
| (imm5 << 16)
2354
| (machreg_to_gpr(rn) << 5)
2355
| machreg_to_vec(rd.to_reg()),
2356
);
2357
}
2358
&Inst::VecDupFromFpu { rd, rn, size, lane } => {
2359
let q = size.is_128bits() as u32;
2360
let imm5 = match size.lane_size() {
2361
ScalarSize::Size8 => {
2362
assert!(lane < 16);
2363
0b00001 | (u32::from(lane) << 1)
2364
}
2365
ScalarSize::Size16 => {
2366
assert!(lane < 8);
2367
0b00010 | (u32::from(lane) << 2)
2368
}
2369
ScalarSize::Size32 => {
2370
assert!(lane < 4);
2371
0b00100 | (u32::from(lane) << 3)
2372
}
2373
ScalarSize::Size64 => {
2374
assert!(lane < 2);
2375
0b01000 | (u32::from(lane) << 4)
2376
}
2377
_ => unimplemented!(),
2378
};
2379
sink.put4(
2380
0b000_01110000_00000_000001_00000_00000
2381
| (q << 30)
2382
| (imm5 << 16)
2383
| (machreg_to_vec(rn) << 5)
2384
| machreg_to_vec(rd.to_reg()),
2385
);
2386
}
2387
&Inst::VecDupFPImm { rd, imm, size } => {
2388
let imm = imm.enc_bits();
2389
let op = match size.lane_size() {
2390
ScalarSize::Size32 => 0,
2391
ScalarSize::Size64 => 1,
2392
_ => unimplemented!(),
2393
};
2394
let q_op = op | ((size.is_128bits() as u32) << 1);
2395
2396
sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
2397
}
2398
&Inst::VecDupImm {
2399
rd,
2400
imm,
2401
invert,
2402
size,
2403
} => {
2404
let (imm, shift, shift_ones) = imm.value();
2405
let (op, cmode) = match size.lane_size() {
2406
ScalarSize::Size8 => {
2407
assert!(!invert);
2408
assert_eq!(shift, 0);
2409
2410
(0, 0b1110)
2411
}
2412
ScalarSize::Size16 => {
2413
let s = shift & 8;
2414
2415
assert!(!shift_ones);
2416
assert_eq!(s, shift);
2417
2418
(invert as u32, 0b1000 | (s >> 2))
2419
}
2420
ScalarSize::Size32 => {
2421
if shift_ones {
2422
assert!(shift == 8 || shift == 16);
2423
2424
(invert as u32, 0b1100 | (shift >> 4))
2425
} else {
2426
let s = shift & 24;
2427
2428
assert_eq!(s, shift);
2429
2430
(invert as u32, 0b0000 | (s >> 2))
2431
}
2432
}
2433
ScalarSize::Size64 => {
2434
assert!(!invert);
2435
assert_eq!(shift, 0);
2436
2437
(1, 0b1110)
2438
}
2439
_ => unreachable!(),
2440
};
2441
let q_op = op | ((size.is_128bits() as u32) << 1);
2442
2443
sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
2444
}
2445
&Inst::VecExtend {
2446
t,
2447
rd,
2448
rn,
2449
high_half,
2450
lane_size,
2451
} => {
2452
let immh = match lane_size {
2453
ScalarSize::Size16 => 0b001,
2454
ScalarSize::Size32 => 0b010,
2455
ScalarSize::Size64 => 0b100,
2456
_ => panic!("Unexpected VecExtend to lane size of {lane_size:?}"),
2457
};
2458
let u = match t {
2459
VecExtendOp::Sxtl => 0b0,
2460
VecExtendOp::Uxtl => 0b1,
2461
};
2462
sink.put4(
2463
0b000_011110_0000_000_101001_00000_00000
2464
| ((high_half as u32) << 30)
2465
| (u << 29)
2466
| (immh << 19)
2467
| (machreg_to_vec(rn) << 5)
2468
| machreg_to_vec(rd.to_reg()),
2469
);
2470
}
2471
&Inst::VecRRLong {
2472
op,
2473
rd,
2474
rn,
2475
high_half,
2476
} => {
2477
let (u, size, bits_12_16) = match op {
2478
VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),
2479
VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),
2480
VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011),
2481
VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011),
2482
VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011),
2483
};
2484
2485
sink.put4(enc_vec_rr_misc(
2486
((high_half as u32) << 1) | u,
2487
size,
2488
bits_12_16,
2489
rd,
2490
rn,
2491
));
2492
}
2493
&Inst::VecRRNarrowLow {
2494
op,
2495
rd,
2496
rn,
2497
lane_size,
2498
}
2499
| &Inst::VecRRNarrowHigh {
2500
op,
2501
rd,
2502
rn,
2503
lane_size,
2504
..
2505
} => {
2506
let high_half = match self {
2507
&Inst::VecRRNarrowLow { .. } => false,
2508
&Inst::VecRRNarrowHigh { .. } => true,
2509
_ => unreachable!(),
2510
};
2511
2512
let size = match lane_size {
2513
ScalarSize::Size8 => 0b00,
2514
ScalarSize::Size16 => 0b01,
2515
ScalarSize::Size32 => 0b10,
2516
_ => panic!("unsupported size: {lane_size:?}"),
2517
};
2518
2519
// Floats use a single bit, to encode either half or single.
2520
let size = match op {
2521
VecRRNarrowOp::Fcvtn => size >> 1,
2522
_ => size,
2523
};
2524
2525
let (u, bits_12_16) = match op {
2526
VecRRNarrowOp::Xtn => (0b0, 0b10010),
2527
VecRRNarrowOp::Sqxtn => (0b0, 0b10100),
2528
VecRRNarrowOp::Sqxtun => (0b1, 0b10010),
2529
VecRRNarrowOp::Uqxtn => (0b1, 0b10100),
2530
VecRRNarrowOp::Fcvtn => (0b0, 0b10110),
2531
};
2532
2533
sink.put4(enc_vec_rr_misc(
2534
((high_half as u32) << 1) | u,
2535
size,
2536
bits_12_16,
2537
rd,
2538
rn,
2539
));
2540
}
2541
&Inst::VecMovElement {
2542
rd,
2543
ri,
2544
rn,
2545
dest_idx,
2546
src_idx,
2547
size,
2548
} => {
2549
debug_assert_eq!(rd.to_reg(), ri);
2550
let (imm5, shift) = match size.lane_size() {
2551
ScalarSize::Size8 => (0b00001, 1),
2552
ScalarSize::Size16 => (0b00010, 2),
2553
ScalarSize::Size32 => (0b00100, 3),
2554
ScalarSize::Size64 => (0b01000, 4),
2555
_ => unreachable!(),
2556
};
2557
let mask = 0b11111 >> shift;
2558
debug_assert_eq!(dest_idx & mask, dest_idx);
2559
debug_assert_eq!(src_idx & mask, src_idx);
2560
let imm4 = (src_idx as u32) << (shift - 1);
2561
let imm5 = imm5 | ((dest_idx as u32) << shift);
2562
sink.put4(
2563
0b011_01110000_00000_0_0000_1_00000_00000
2564
| (imm5 << 16)
2565
| (imm4 << 11)
2566
| (machreg_to_vec(rn) << 5)
2567
| machreg_to_vec(rd.to_reg()),
2568
);
2569
}
2570
&Inst::VecRRPair { op, rd, rn } => {
2571
let bits_12_16 = match op {
2572
VecPairOp::Addp => 0b11011,
2573
};
2574
2575
sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
2576
}
2577
&Inst::VecRRRLong {
2578
rd,
2579
rn,
2580
rm,
2581
alu_op,
2582
high_half,
2583
} => {
2584
let (u, size, bit14) = match alu_op {
2585
VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),
2586
VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),
2587
VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1),
2588
VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
2589
VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
2590
VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
2591
};
2592
sink.put4(enc_vec_rrr_long(
2593
high_half as u32,
2594
u,
2595
size,
2596
bit14,
2597
rm,
2598
rn,
2599
rd,
2600
));
2601
}
2602
&Inst::VecRRRLongMod {
2603
rd,
2604
ri,
2605
rn,
2606
rm,
2607
alu_op,
2608
high_half,
2609
} => {
2610
debug_assert_eq!(rd.to_reg(), ri);
2611
let (u, size, bit14) = match alu_op {
2612
VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0),
2613
VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0),
2614
VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0),
2615
};
2616
sink.put4(enc_vec_rrr_long(
2617
high_half as u32,
2618
u,
2619
size,
2620
bit14,
2621
rm,
2622
rn,
2623
rd,
2624
));
2625
}
2626
&Inst::VecRRPairLong { op, rd, rn } => {
2627
let (u, size) = match op {
2628
VecRRPairLongOp::Saddlp8 => (0b0, 0b0),
2629
VecRRPairLongOp::Uaddlp8 => (0b1, 0b0),
2630
VecRRPairLongOp::Saddlp16 => (0b0, 0b1),
2631
VecRRPairLongOp::Uaddlp16 => (0b1, 0b1),
2632
};
2633
2634
sink.put4(enc_vec_rr_pair_long(u, size, rd, rn));
2635
}
2636
&Inst::VecRRR {
2637
rd,
2638
rn,
2639
rm,
2640
alu_op,
2641
size,
2642
} => {
2643
let (q, enc_size) = size.enc_size();
2644
let is_float = match alu_op {
2645
VecALUOp::Fcmeq
2646
| VecALUOp::Fcmgt
2647
| VecALUOp::Fcmge
2648
| VecALUOp::Fadd
2649
| VecALUOp::Fsub
2650
| VecALUOp::Fdiv
2651
| VecALUOp::Fmax
2652
| VecALUOp::Fmin
2653
| VecALUOp::Fmul => true,
2654
_ => false,
2655
};
2656
2657
let (top11, bit15_10) = match alu_op {
2658
VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
2659
VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
2660
VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
2661
VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
2662
VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
2663
VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
2664
VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
2665
VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
2666
VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
2667
VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
2668
VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
2669
VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
2670
// The following logical instructions operate on bytes, so are not encoded differently
2671
// for the different vector types.
2672
VecALUOp::And => (0b000_01110_00_1, 0b000111),
2673
VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
2674
VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
2675
VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
2676
VecALUOp::Umaxp => {
2677
debug_assert_ne!(size, VectorSize::Size64x2);
2678
2679
(0b001_01110_00_1 | enc_size << 1, 0b101001)
2680
}
2681
VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
2682
VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
2683
VecALUOp::Mul => {
2684
debug_assert_ne!(size, VectorSize::Size64x2);
2685
(0b000_01110_00_1 | enc_size << 1, 0b100111)
2686
}
2687
VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
2688
VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
2689
VecALUOp::Umin => {
2690
debug_assert_ne!(size, VectorSize::Size64x2);
2691
2692
(0b001_01110_00_1 | enc_size << 1, 0b011011)
2693
}
2694
VecALUOp::Smin => {
2695
debug_assert_ne!(size, VectorSize::Size64x2);
2696
2697
(0b000_01110_00_1 | enc_size << 1, 0b011011)
2698
}
2699
VecALUOp::Umax => {
2700
debug_assert_ne!(size, VectorSize::Size64x2);
2701
2702
(0b001_01110_00_1 | enc_size << 1, 0b011001)
2703
}
2704
VecALUOp::Smax => {
2705
debug_assert_ne!(size, VectorSize::Size64x2);
2706
2707
(0b000_01110_00_1 | enc_size << 1, 0b011001)
2708
}
2709
VecALUOp::Urhadd => {
2710
debug_assert_ne!(size, VectorSize::Size64x2);
2711
2712
(0b001_01110_00_1 | enc_size << 1, 0b000101)
2713
}
2714
VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
2715
VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
2716
VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
2717
VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
2718
VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
2719
VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
2720
VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
2721
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
2722
VecALUOp::Zip2 => (0b01001110_00_0 | enc_size << 1, 0b011110),
2723
VecALUOp::Sqrdmulh => {
2724
debug_assert!(
2725
size.lane_size() == ScalarSize::Size16
2726
|| size.lane_size() == ScalarSize::Size32
2727
);
2728
2729
(0b001_01110_00_1 | enc_size << 1, 0b101101)
2730
}
2731
VecALUOp::Uzp1 => (0b01001110_00_0 | enc_size << 1, 0b000110),
2732
VecALUOp::Uzp2 => (0b01001110_00_0 | enc_size << 1, 0b010110),
2733
VecALUOp::Trn1 => (0b01001110_00_0 | enc_size << 1, 0b001010),
2734
VecALUOp::Trn2 => (0b01001110_00_0 | enc_size << 1, 0b011010),
2735
};
2736
let top11 = if is_float {
2737
top11 | size.enc_float_size() << 1
2738
} else {
2739
top11
2740
};
2741
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2742
}
2743
&Inst::VecRRRMod {
2744
rd,
2745
ri,
2746
rn,
2747
rm,
2748
alu_op,
2749
size,
2750
} => {
2751
debug_assert_eq!(rd.to_reg(), ri);
2752
let (q, _enc_size) = size.enc_size();
2753
2754
let (top11, bit15_10) = match alu_op {
2755
VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111),
2756
VecALUModOp::Fmla => {
2757
(0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011)
2758
}
2759
VecALUModOp::Fmls => {
2760
(0b000_01110_10_1 | (size.enc_float_size() << 1), 0b110011)
2761
}
2762
};
2763
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2764
}
2765
&Inst::VecFmlaElem {
2766
rd,
2767
ri,
2768
rn,
2769
rm,
2770
alu_op,
2771
size,
2772
idx,
2773
} => {
2774
debug_assert_eq!(rd.to_reg(), ri);
2775
let idx = u32::from(idx);
2776
2777
let (q, _size) = size.enc_size();
2778
let o2 = match alu_op {
2779
VecALUModOp::Fmla => 0b0,
2780
VecALUModOp::Fmls => 0b1,
2781
_ => unreachable!(),
2782
};
2783
2784
let (h, l) = match size {
2785
VectorSize::Size32x4 => {
2786
assert!(idx < 4);
2787
(idx >> 1, idx & 1)
2788
}
2789
VectorSize::Size64x2 => {
2790
assert!(idx < 2);
2791
(idx, 0)
2792
}
2793
_ => unreachable!(),
2794
};
2795
2796
let top11 = 0b000_011111_00 | (q << 9) | (size.enc_float_size() << 1) | l;
2797
let bit15_10 = 0b000100 | (o2 << 4) | (h << 1);
2798
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
2799
}
2800
&Inst::VecLoadReplicate {
2801
rd,
2802
rn,
2803
size,
2804
flags,
2805
} => {
2806
let (q, size) = size.enc_size();
2807
2808
if let Some(trap_code) = flags.trap_code() {
2809
// Register the offset at which the actual load instruction starts.
2810
sink.add_trap(trap_code);
2811
}
2812
2813
sink.put4(enc_ldst_vec(q, size, rn, rd));
2814
}
2815
&Inst::VecCSel { rd, rn, rm, cond } => {
2816
/* Emit this:
2817
b.cond else
2818
mov rd, rm
2819
b out
2820
else:
2821
mov rd, rn
2822
out:
2823
2824
Note, we could do better in the cases where rd == rn or rd == rm.
2825
*/
2826
let else_label = sink.get_label();
2827
let out_label = sink.get_label();
2828
2829
// b.cond else
2830
let br_else_offset = sink.cur_offset();
2831
sink.put4(enc_conditional_br(
2832
BranchTarget::Label(else_label),
2833
CondBrKind::Cond(cond),
2834
));
2835
sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
2836
2837
// mov rd, rm
2838
sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
2839
2840
// b out
2841
let b_out_offset = sink.cur_offset();
2842
sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
2843
sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
2844
sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
2845
2846
// else:
2847
sink.bind_label(else_label, &mut state.ctrl_plane);
2848
2849
// mov rd, rn
2850
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
2851
2852
// out:
2853
sink.bind_label(out_label, &mut state.ctrl_plane);
2854
}
2855
&Inst::MovToNZCV { rn } => {
2856
sink.put4(0xd51b4200 | machreg_to_gpr(rn));
2857
}
2858
&Inst::MovFromNZCV { rd } => {
2859
sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
2860
}
2861
&Inst::Extend {
2862
rd,
2863
rn,
2864
signed: false,
2865
from_bits: 1,
2866
to_bits,
2867
} => {
2868
assert!(to_bits <= 64);
2869
// Reduce zero-extend-from-1-bit to:
2870
// - and rd, rn, #1
2871
// Note: This is special cased as UBFX may take more cycles
2872
// than AND on smaller cores.
2873
let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
2874
Inst::AluRRImmLogic {
2875
alu_op: ALUOp::And,
2876
size: OperandSize::Size32,
2877
rd,
2878
rn,
2879
imml,
2880
}
2881
.emit(sink, emit_info, state);
2882
}
2883
&Inst::Extend {
2884
rd,
2885
rn,
2886
signed: false,
2887
from_bits: 32,
2888
to_bits: 64,
2889
} => {
2890
let mov = Inst::Mov {
2891
size: OperandSize::Size32,
2892
rd,
2893
rm: rn,
2894
};
2895
mov.emit(sink, emit_info, state);
2896
}
2897
&Inst::Extend {
2898
rd,
2899
rn,
2900
signed,
2901
from_bits,
2902
to_bits,
2903
} => {
2904
let (opc, size) = if signed {
2905
(0b00, OperandSize::from_bits(to_bits))
2906
} else {
2907
(0b10, OperandSize::Size32)
2908
};
2909
sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
2910
}
2911
&Inst::Jump { ref dest } => {
2912
let off = sink.cur_offset();
2913
// Indicate that the jump uses a label, if so, so that a fixup can occur later.
2914
if let Some(l) = dest.as_label() {
2915
sink.use_label_at_offset(off, l, LabelUse::Branch26);
2916
sink.add_uncond_branch(off, off + 4, l);
2917
}
2918
// Emit the jump itself.
2919
sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
2920
}
2921
&Inst::Args { .. } | &Inst::Rets { .. } => {
2922
// Nothing: this is a pseudoinstruction that serves
2923
// only to constrain registers at a certain point.
2924
}
2925
&Inst::Ret {} => {
2926
sink.put4(0xd65f03c0);
2927
}
2928
&Inst::AuthenticatedRet { key, is_hint } => {
2929
let (op2, is_hint) = match key {
2930
APIKey::AZ => (0b100, true),
2931
APIKey::ASP => (0b101, is_hint),
2932
APIKey::BZ => (0b110, true),
2933
APIKey::BSP => (0b111, is_hint),
2934
};
2935
2936
if is_hint {
2937
sink.put4(key.enc_auti_hint());
2938
Inst::Ret {}.emit(sink, emit_info, state);
2939
} else {
2940
sink.put4(0xd65f0bff | (op2 << 9)); // reta{key}
2941
}
2942
}
2943
&Inst::Call { ref info } => {
2944
let user_stack_map = state.take_stack_map();
2945
sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
2946
sink.put4(enc_jump26(0b100101, 0));
2947
if let Some(s) = user_stack_map {
2948
let offset = sink.cur_offset();
2949
sink.push_user_stack_map(state, offset, s);
2950
}
2951
2952
if let Some(try_call) = info.try_call_info.as_ref() {
2953
sink.add_try_call_site(
2954
Some(state.frame_layout.sp_to_fp()),
2955
try_call.exception_handlers(&state.frame_layout),
2956
);
2957
} else {
2958
sink.add_call_site();
2959
}
2960
2961
if info.callee_pop_size > 0 {
2962
let callee_pop_size =
2963
i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
2964
for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
2965
inst.emit(sink, emit_info, state);
2966
}
2967
}
2968
2969
// Load any stack-carried return values.
2970
info.emit_retval_loads::<AArch64MachineDeps, _, _>(
2971
state.frame_layout().stackslots_size,
2972
|inst| inst.emit(sink, emit_info, state),
2973
|needed_space| Some(Inst::EmitIsland { needed_space }),
2974
);
2975
2976
// If this is a try-call, jump to the continuation
2977
// (normal-return) block.
2978
if let Some(try_call) = info.try_call_info.as_ref() {
2979
let jmp = Inst::Jump {
2980
dest: BranchTarget::Label(try_call.continuation),
2981
};
2982
jmp.emit(sink, emit_info, state);
2983
}
2984
2985
// We produce an island above if needed, so disable
2986
// the worst-case-size check in this case.
2987
start_off = sink.cur_offset();
2988
}
2989
&Inst::CallInd { ref info } => {
2990
let user_stack_map = state.take_stack_map();
2991
sink.put4(
2992
0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.dest) << 5),
2993
);
2994
if let Some(s) = user_stack_map {
2995
let offset = sink.cur_offset();
2996
sink.push_user_stack_map(state, offset, s);
2997
}
2998
2999
if let Some(try_call) = info.try_call_info.as_ref() {
3000
sink.add_try_call_site(
3001
Some(state.frame_layout.sp_to_fp()),
3002
try_call.exception_handlers(&state.frame_layout),
3003
);
3004
} else {
3005
sink.add_call_site();
3006
}
3007
3008
if info.callee_pop_size > 0 {
3009
let callee_pop_size =
3010
i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
3011
for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
3012
inst.emit(sink, emit_info, state);
3013
}
3014
}
3015
3016
// Load any stack-carried return values.
3017
info.emit_retval_loads::<AArch64MachineDeps, _, _>(
3018
state.frame_layout().stackslots_size,
3019
|inst| inst.emit(sink, emit_info, state),
3020
|needed_space| Some(Inst::EmitIsland { needed_space }),
3021
);
3022
3023
// If this is a try-call, jump to the continuation
3024
// (normal-return) block.
3025
if let Some(try_call) = info.try_call_info.as_ref() {
3026
let jmp = Inst::Jump {
3027
dest: BranchTarget::Label(try_call.continuation),
3028
};
3029
jmp.emit(sink, emit_info, state);
3030
}
3031
3032
// We produce an island above if needed, so disable
3033
// the worst-case-size check in this case.
3034
start_off = sink.cur_offset();
3035
}
3036
&Inst::ReturnCall { ref info } => {
3037
emit_return_call_common_sequence(sink, emit_info, state, info);
3038
3039
// Note: this is not `Inst::Jump { .. }.emit(..)` because we
3040
// have different metadata in this case: we don't have a label
3041
// for the target, but rather a function relocation.
3042
sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
3043
sink.put4(enc_jump26(0b000101, 0));
3044
sink.add_call_site();
3045
3046
// `emit_return_call_common_sequence` emits an island if
3047
// necessary, so we can safely disable the worst-case-size check
3048
// in this case.
3049
start_off = sink.cur_offset();
3050
}
3051
&Inst::ReturnCallInd { ref info } => {
3052
emit_return_call_common_sequence(sink, emit_info, state, info);
3053
3054
Inst::IndirectBr {
3055
rn: info.dest,
3056
targets: vec![],
3057
}
3058
.emit(sink, emit_info, state);
3059
sink.add_call_site();
3060
3061
// `emit_return_call_common_sequence` emits an island if
3062
// necessary, so we can safely disable the worst-case-size check
3063
// in this case.
3064
start_off = sink.cur_offset();
3065
}
3066
&Inst::CondBr {
3067
taken,
3068
not_taken,
3069
kind,
3070
} => {
3071
// Conditional part first.
3072
let cond_off = sink.cur_offset();
3073
if let Some(l) = taken.as_label() {
3074
sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
3075
let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
3076
sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3077
}
3078
sink.put4(enc_conditional_br(taken, kind));
3079
3080
// Unconditional part next.
3081
let uncond_off = sink.cur_offset();
3082
if let Some(l) = not_taken.as_label() {
3083
sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3084
sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3085
}
3086
sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3087
}
3088
&Inst::TestBitAndBranch {
3089
taken,
3090
not_taken,
3091
kind,
3092
rn,
3093
bit,
3094
} => {
3095
// Emit the conditional branch first
3096
let cond_off = sink.cur_offset();
3097
if let Some(l) = taken.as_label() {
3098
sink.use_label_at_offset(cond_off, l, LabelUse::Branch14);
3099
let inverted =
3100
enc_test_bit_and_branch(kind.complement(), taken, rn, bit).to_le_bytes();
3101
sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3102
}
3103
sink.put4(enc_test_bit_and_branch(kind, taken, rn, bit));
3104
3105
// Unconditional part next.
3106
let uncond_off = sink.cur_offset();
3107
if let Some(l) = not_taken.as_label() {
3108
sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3109
sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3110
}
3111
sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3112
}
3113
&Inst::TrapIf { kind, trap_code } => {
3114
let label = sink.defer_trap(trap_code);
3115
// condbr KIND, LABEL
3116
let off = sink.cur_offset();
3117
sink.put4(enc_conditional_br(BranchTarget::Label(label), kind));
3118
sink.use_label_at_offset(off, label, LabelUse::Branch19);
3119
}
3120
&Inst::IndirectBr { rn, .. } => {
3121
sink.put4(enc_br(rn));
3122
}
3123
&Inst::Nop0 => {}
3124
&Inst::Nop4 => {
3125
sink.put4(0xd503201f);
3126
}
3127
&Inst::Brk => {
3128
sink.put4(0xd43e0000);
3129
}
3130
&Inst::Udf { trap_code } => {
3131
sink.add_trap(trap_code);
3132
sink.put_data(Inst::TRAP_OPCODE);
3133
}
3134
&Inst::Adr { rd, off } => {
3135
assert!(off > -(1 << 20));
3136
assert!(off < (1 << 20));
3137
sink.put4(enc_adr(off, rd));
3138
}
3139
&Inst::Adrp { rd, off } => {
3140
assert!(off > -(1 << 20));
3141
assert!(off < (1 << 20));
3142
sink.put4(enc_adrp(off, rd));
3143
}
3144
&Inst::Word4 { data } => {
3145
sink.put4(data);
3146
}
3147
&Inst::Word8 { data } => {
3148
sink.put8(data);
3149
}
3150
&Inst::JTSequence {
3151
ridx,
3152
rtmp1,
3153
rtmp2,
3154
default,
3155
ref targets,
3156
..
3157
} => {
3158
// This sequence is *one* instruction in the vcode, and is expanded only here at
3159
// emission time, because we cannot allow the regalloc to insert spills/reloads in
3160
// the middle; we depend on hardcoded PC-rel addressing below.
3161
3162
// Branch to default when condition code from prior comparison indicates.
3163
let br =
3164
enc_conditional_br(BranchTarget::Label(default), CondBrKind::Cond(Cond::Hs));
3165
3166
// No need to inform the sink's branch folding logic about this branch, because it
3167
// will not be merged with any other branch, flipped, or elided (it is not preceded
3168
// or succeeded by any other branch). Just emit it with the label use.
3169
let default_br_offset = sink.cur_offset();
3170
sink.use_label_at_offset(default_br_offset, default, LabelUse::Branch19);
3171
sink.put4(br);
3172
3173
// Overwrite the index with a zero when the above
3174
// branch misspeculates (Spectre mitigation). Save the
3175
// resulting index in rtmp2.
3176
let inst = Inst::CSel {
3177
rd: rtmp2,
3178
cond: Cond::Hs,
3179
rn: zero_reg(),
3180
rm: ridx,
3181
};
3182
inst.emit(sink, emit_info, state);
3183
// Prevent any data value speculation.
3184
Inst::Csdb.emit(sink, emit_info, state);
3185
3186
// Load address of jump table
3187
let inst = Inst::Adr { rd: rtmp1, off: 16 };
3188
inst.emit(sink, emit_info, state);
3189
// Load value out of jump table
3190
let inst = Inst::SLoad32 {
3191
rd: rtmp2,
3192
mem: AMode::reg_plus_reg_scaled_extended(
3193
rtmp1.to_reg(),
3194
rtmp2.to_reg(),
3195
ExtendOp::UXTW,
3196
),
3197
flags: MemFlags::trusted(),
3198
};
3199
inst.emit(sink, emit_info, state);
3200
// Add base of jump table to jump-table-sourced block offset
3201
let inst = Inst::AluRRR {
3202
alu_op: ALUOp::Add,
3203
size: OperandSize::Size64,
3204
rd: rtmp1,
3205
rn: rtmp1.to_reg(),
3206
rm: rtmp2.to_reg(),
3207
};
3208
inst.emit(sink, emit_info, state);
3209
// Branch to computed address. (`targets` here is only used for successor queries
3210
// and is not needed for emission.)
3211
let inst = Inst::IndirectBr {
3212
rn: rtmp1.to_reg(),
3213
targets: vec![],
3214
};
3215
inst.emit(sink, emit_info, state);
3216
// Emit jump table (table of 32-bit offsets).
3217
let jt_off = sink.cur_offset();
3218
for &target in targets.iter() {
3219
let word_off = sink.cur_offset();
3220
// off_into_table is an addend here embedded in the label to be later patched
3221
// at the end of codegen. The offset is initially relative to this jump table
3222
// entry; with the extra addend, it'll be relative to the jump table's start,
3223
// after patching.
3224
let off_into_table = word_off - jt_off;
3225
sink.use_label_at_offset(word_off, target, LabelUse::PCRel32);
3226
sink.put4(off_into_table);
3227
}
3228
3229
// Lowering produces an EmitIsland before using a JTSequence, so we can safely
3230
// disable the worst-case-size check in this case.
3231
start_off = sink.cur_offset();
3232
}
3233
&Inst::LoadExtNameGot { rd, ref name } => {
3234
// See this CE Example for the variations of this with and without BTI & PAUTH
3235
// https://godbolt.org/z/ncqjbbvvn
3236
//
3237
// Emit the following code:
3238
// adrp rd, :got:X
3239
// ldr rd, [rd, :got_lo12:X]
3240
3241
// adrp rd, symbol
3242
sink.add_reloc(Reloc::Aarch64AdrGotPage21, &**name, 0);
3243
let inst = Inst::Adrp { rd, off: 0 };
3244
inst.emit(sink, emit_info, state);
3245
3246
// ldr rd, [rd, :got_lo12:X]
3247
sink.add_reloc(Reloc::Aarch64Ld64GotLo12Nc, &**name, 0);
3248
let inst = Inst::ULoad64 {
3249
rd,
3250
mem: AMode::reg(rd.to_reg()),
3251
flags: MemFlags::trusted(),
3252
};
3253
inst.emit(sink, emit_info, state);
3254
}
3255
&Inst::LoadExtNameNear {
3256
rd,
3257
ref name,
3258
offset,
3259
} => {
3260
// Emit the following code:
3261
// adrp rd, X
3262
// add rd, rd, :lo12:X
3263
//
3264
// See https://godbolt.org/z/855KEvM5r for an example.
3265
3266
// adrp rd, symbol
3267
sink.add_reloc(Reloc::Aarch64AdrPrelPgHi21, &**name, offset);
3268
let inst = Inst::Adrp { rd, off: 0 };
3269
inst.emit(sink, emit_info, state);
3270
3271
// add rd, rd, :lo12:X
3272
sink.add_reloc(Reloc::Aarch64AddAbsLo12Nc, &**name, offset);
3273
let inst = Inst::AluRRImm12 {
3274
alu_op: ALUOp::Add,
3275
size: OperandSize::Size64,
3276
rd,
3277
rn: rd.to_reg(),
3278
imm12: Imm12::ZERO,
3279
};
3280
inst.emit(sink, emit_info, state);
3281
}
3282
&Inst::LoadExtNameFar {
3283
rd,
3284
ref name,
3285
offset,
3286
} => {
3287
// With absolute offsets we set up a load from a preallocated space, and then jump
3288
// over it.
3289
//
3290
// Emit the following code:
3291
// ldr rd, #8
3292
// b #0x10
3293
// <8 byte space>
3294
3295
let inst = Inst::ULoad64 {
3296
rd,
3297
mem: AMode::Label {
3298
label: MemLabel::PCRel(8),
3299
},
3300
flags: MemFlags::trusted(),
3301
};
3302
inst.emit(sink, emit_info, state);
3303
let inst = Inst::Jump {
3304
dest: BranchTarget::ResolvedOffset(12),
3305
};
3306
inst.emit(sink, emit_info, state);
3307
sink.add_reloc(Reloc::Abs8, &**name, offset);
3308
sink.put8(0);
3309
}
3310
&Inst::LoadAddr { rd, ref mem } => {
3311
let mem = mem.clone();
3312
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, I8, state);
3313
for inst in mem_insts.into_iter() {
3314
inst.emit(sink, emit_info, state);
3315
}
3316
3317
let (reg, index_reg, offset) = match mem {
3318
AMode::RegExtended { rn, rm, extendop } => {
3319
let r = rn;
3320
(r, Some((rm, extendop)), 0)
3321
}
3322
AMode::Unscaled { rn, simm9 } => {
3323
let r = rn;
3324
(r, None, simm9.value())
3325
}
3326
AMode::UnsignedOffset { rn, uimm12 } => {
3327
let r = rn;
3328
(r, None, uimm12.value() as i32)
3329
}
3330
_ => panic!("Unsupported case for LoadAddr: {mem:?}"),
3331
};
3332
let abs_offset = if offset < 0 {
3333
-offset as u64
3334
} else {
3335
offset as u64
3336
};
3337
let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
3338
3339
if let Some((idx, extendop)) = index_reg {
3340
let add = Inst::AluRRRExtend {
3341
alu_op: ALUOp::Add,
3342
size: OperandSize::Size64,
3343
rd,
3344
rn: reg,
3345
rm: idx,
3346
extendop,
3347
};
3348
3349
add.emit(sink, emit_info, state);
3350
} else if offset == 0 {
3351
if reg != rd.to_reg() {
3352
let mov = Inst::Mov {
3353
size: OperandSize::Size64,
3354
rd,
3355
rm: reg,
3356
};
3357
3358
mov.emit(sink, emit_info, state);
3359
}
3360
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
3361
let add = Inst::AluRRImm12 {
3362
alu_op,
3363
size: OperandSize::Size64,
3364
rd,
3365
rn: reg,
3366
imm12,
3367
};
3368
add.emit(sink, emit_info, state);
3369
} else {
3370
// Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
3371
// was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
3372
// that no other instructions will be inserted here (we're emitting directly),
3373
// and a live range of `tmp2` should not span this instruction, so this use
3374
// should otherwise be correct.
3375
debug_assert!(rd.to_reg() != tmp2_reg());
3376
debug_assert!(reg != tmp2_reg());
3377
let tmp = writable_tmp2_reg();
3378
for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
3379
insn.emit(sink, emit_info, state);
3380
}
3381
let add = Inst::AluRRR {
3382
alu_op,
3383
size: OperandSize::Size64,
3384
rd,
3385
rn: reg,
3386
rm: tmp.to_reg(),
3387
};
3388
add.emit(sink, emit_info, state);
3389
}
3390
}
3391
&Inst::Paci { key } => {
3392
let (crm, op2) = match key {
3393
APIKey::AZ => (0b0011, 0b000),
3394
APIKey::ASP => (0b0011, 0b001),
3395
APIKey::BZ => (0b0011, 0b010),
3396
APIKey::BSP => (0b0011, 0b011),
3397
};
3398
3399
sink.put4(0xd503211f | (crm << 8) | (op2 << 5));
3400
}
3401
&Inst::Xpaclri => sink.put4(0xd50320ff),
3402
&Inst::Bti { targets } => {
3403
let targets = match targets {
3404
BranchTargetType::None => 0b00,
3405
BranchTargetType::C => 0b01,
3406
BranchTargetType::J => 0b10,
3407
BranchTargetType::JC => 0b11,
3408
};
3409
3410
sink.put4(0xd503241f | targets << 6);
3411
}
3412
&Inst::EmitIsland { needed_space } => {
3413
if sink.island_needed(needed_space + 4) {
3414
let jump_around_label = sink.get_label();
3415
let jmp = Inst::Jump {
3416
dest: BranchTarget::Label(jump_around_label),
3417
};
3418
jmp.emit(sink, emit_info, state);
3419
sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
3420
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
3421
}
3422
}
3423
3424
&Inst::ElfTlsGetAddr {
3425
ref symbol,
3426
rd,
3427
tmp,
3428
} => {
3429
assert_eq!(xreg(0), rd.to_reg());
3430
3431
// See the original proposal for TLSDESC.
3432
// http://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf
3433
//
3434
// Implement the TLSDESC instruction sequence:
3435
// adrp x0, :tlsdesc:tlsvar
3436
// ldr tmp, [x0, :tlsdesc_lo12:tlsvar]
3437
// add x0, x0, :tlsdesc_lo12:tlsvar
3438
// blr tmp
3439
// mrs tmp, tpidr_el0
3440
// add x0, x0, tmp
3441
//
3442
// This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch64
3443
// See: https://gcc.godbolt.org/z/e4j7MdErh
3444
3445
// adrp x0, :tlsdesc:tlsvar
3446
sink.add_reloc(Reloc::Aarch64TlsDescAdrPage21, &**symbol, 0);
3447
Inst::Adrp { rd, off: 0 }.emit(sink, emit_info, state);
3448
3449
// ldr tmp, [x0, :tlsdesc_lo12:tlsvar]
3450
sink.add_reloc(Reloc::Aarch64TlsDescLd64Lo12, &**symbol, 0);
3451
Inst::ULoad64 {
3452
rd: tmp,
3453
mem: AMode::reg(rd.to_reg()),
3454
flags: MemFlags::trusted(),
3455
}
3456
.emit(sink, emit_info, state);
3457
3458
// add x0, x0, :tlsdesc_lo12:tlsvar
3459
sink.add_reloc(Reloc::Aarch64TlsDescAddLo12, &**symbol, 0);
3460
Inst::AluRRImm12 {
3461
alu_op: ALUOp::Add,
3462
size: OperandSize::Size64,
3463
rd,
3464
rn: rd.to_reg(),
3465
imm12: Imm12::maybe_from_u64(0).unwrap(),
3466
}
3467
.emit(sink, emit_info, state);
3468
3469
// blr tmp
3470
sink.add_reloc(Reloc::Aarch64TlsDescCall, &**symbol, 0);
3471
Inst::CallInd {
3472
info: crate::isa::Box::new(CallInfo::empty(tmp.to_reg(), CallConv::SystemV)),
3473
}
3474
.emit(sink, emit_info, state);
3475
3476
// mrs tmp, tpidr_el0
3477
sink.put4(0xd53bd040 | machreg_to_gpr(tmp.to_reg()));
3478
3479
// add x0, x0, tmp
3480
Inst::AluRRR {
3481
alu_op: ALUOp::Add,
3482
size: OperandSize::Size64,
3483
rd,
3484
rn: rd.to_reg(),
3485
rm: tmp.to_reg(),
3486
}
3487
.emit(sink, emit_info, state);
3488
}
3489
3490
&Inst::MachOTlsGetAddr { ref symbol, rd } => {
3491
// Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer
3492
// to a function that takes the descriptor address in x0, and after the function returns x0
3493
// contains the address for the thread local variable
3494
//
3495
// what we want to emit is basically:
3496
//
3497
// adrp x0, <label>@TLVPPAGE ; Load the address of the page of the thread local variable pointer (TLVP)
3498
// ldr x0, [x0, <label>@TLVPPAGEOFF] ; Load the descriptor's address into x0
3499
// ldr x1, [x0] ; Load the function pointer (the first part of the descriptor)
3500
// blr x1 ; Call the function pointer with the descriptor address in x0
3501
// ; x0 now contains the TLV address
3502
3503
assert_eq!(xreg(0), rd.to_reg());
3504
let rtmp = writable_xreg(1);
3505
3506
// adrp x0, <label>@TLVPPAGE
3507
sink.add_reloc(Reloc::MachOAarch64TlsAdrPage21, symbol, 0);
3508
sink.put4(0x90000000);
3509
3510
// ldr x0, [x0, <label>@TLVPPAGEOFF]
3511
sink.add_reloc(Reloc::MachOAarch64TlsAdrPageOff12, symbol, 0);
3512
sink.put4(0xf9400000);
3513
3514
// load [x0] into temp register
3515
Inst::ULoad64 {
3516
rd: rtmp,
3517
mem: AMode::reg(rd.to_reg()),
3518
flags: MemFlags::trusted(),
3519
}
3520
.emit(sink, emit_info, state);
3521
3522
// call function pointer in temp register
3523
Inst::CallInd {
3524
info: crate::isa::Box::new(CallInfo::empty(
3525
rtmp.to_reg(),
3526
CallConv::AppleAarch64,
3527
)),
3528
}
3529
.emit(sink, emit_info, state);
3530
}
3531
3532
&Inst::Unwind { ref inst } => {
3533
sink.add_unwind(inst.clone());
3534
}
3535
3536
&Inst::DummyUse { .. } => {}
3537
3538
&Inst::LabelAddress { dst, label } => {
3539
// We emit an ADR only, which is +/- 2MiB range. This
3540
// should be sufficient for the typical use-case of
3541
// this instruction, which is insmall trampolines to
3542
// get exception-handler addresses.
3543
let inst = Inst::Adr { rd: dst, off: 0 };
3544
let offset = sink.cur_offset();
3545
inst.emit(sink, emit_info, state);
3546
sink.use_label_at_offset(offset, label, LabelUse::Adr21);
3547
}
3548
3549
&Inst::StackProbeLoop { start, end, step } => {
3550
assert!(emit_info.0.enable_probestack());
3551
3552
// The loop generated here uses `start` as a counter register to
3553
// count backwards until negating it exceeds `end`. In other
3554
// words `start` is an offset from `sp` we're testing where
3555
// `end` is the max size we need to test. The loop looks like:
3556
//
3557
// loop_start:
3558
// sub start, start, #step
3559
// stur xzr, [sp, start]
3560
// cmn start, end
3561
// br.gt loop_start
3562
// loop_end:
3563
//
3564
// Note that this loop cannot use the spilltmp and tmp2
3565
// registers as those are currently used as the input to this
3566
// loop when generating the instruction. This means that some
3567
// more flavorful address modes and lowerings need to be
3568
// avoided.
3569
//
3570
// Perhaps someone more clever than I can figure out how to use
3571
// `subs` or the like and skip the `cmn`, but I can't figure it
3572
// out at this time.
3573
3574
let loop_start = sink.get_label();
3575
sink.bind_label(loop_start, &mut state.ctrl_plane);
3576
3577
Inst::AluRRImm12 {
3578
alu_op: ALUOp::Sub,
3579
size: OperandSize::Size64,
3580
rd: start,
3581
rn: start.to_reg(),
3582
imm12: step,
3583
}
3584
.emit(sink, emit_info, state);
3585
Inst::Store32 {
3586
rd: regs::zero_reg(),
3587
mem: AMode::RegReg {
3588
rn: regs::stack_reg(),
3589
rm: start.to_reg(),
3590
},
3591
flags: MemFlags::trusted(),
3592
}
3593
.emit(sink, emit_info, state);
3594
Inst::AluRRR {
3595
alu_op: ALUOp::AddS,
3596
size: OperandSize::Size64,
3597
rd: regs::writable_zero_reg(),
3598
rn: start.to_reg(),
3599
rm: end,
3600
}
3601
.emit(sink, emit_info, state);
3602
3603
let loop_end = sink.get_label();
3604
Inst::CondBr {
3605
taken: BranchTarget::Label(loop_start),
3606
not_taken: BranchTarget::Label(loop_end),
3607
kind: CondBrKind::Cond(Cond::Gt),
3608
}
3609
.emit(sink, emit_info, state);
3610
sink.bind_label(loop_end, &mut state.ctrl_plane);
3611
}
3612
}
3613
3614
let end_off = sink.cur_offset();
3615
debug_assert!(
3616
(end_off - start_off) <= Inst::worst_case_size()
3617
|| matches!(self, Inst::EmitIsland { .. }),
3618
"Worst case size exceed for {:?}: {}",
3619
self,
3620
end_off - start_off
3621
);
3622
3623
state.clear_post_insn();
3624
}
3625
3626
fn pretty_print_inst(&self, state: &mut Self::State) -> String {
3627
self.print_with_state(state)
3628
}
3629
}
3630
3631
fn emit_return_call_common_sequence<T>(
3632
sink: &mut MachBuffer<Inst>,
3633
emit_info: &EmitInfo,
3634
state: &mut EmitState,
3635
info: &ReturnCallInfo<T>,
3636
) {
3637
for inst in
3638
AArch64MachineDeps::gen_clobber_restore(CallConv::Tail, &emit_info.0, state.frame_layout())
3639
{
3640
inst.emit(sink, emit_info, state);
3641
}
3642
3643
let setup_area_size = state.frame_layout().setup_area_size;
3644
if setup_area_size > 0 {
3645
// N.B.: sp is already adjusted to the appropriate place by the
3646
// clobber-restore code (which also frees the fixed frame). Hence, there
3647
// is no need for the usual `mov sp, fp` here.
3648
3649
// `ldp fp, lr, [sp], #16`
3650
Inst::LoadP64 {
3651
rt: writable_fp_reg(),
3652
rt2: writable_link_reg(),
3653
mem: PairAMode::SPPostIndexed {
3654
// TODO: we could fold the increment for incoming_args_diff here, as long as that
3655
// value is less than 502*8, by adding it to `setup_area_size`.
3656
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDP--Load-Pair-of-Registers-
3657
simm7: SImm7Scaled::maybe_from_i64(i64::from(setup_area_size), types::I64).unwrap(),
3658
},
3659
flags: MemFlags::trusted(),
3660
}
3661
.emit(sink, emit_info, state);
3662
}
3663
3664
// Adjust SP to account for the possible over-allocation in the prologue.
3665
let incoming_args_diff = state.frame_layout().tail_args_size - info.new_stack_arg_size;
3666
if incoming_args_diff > 0 {
3667
for inst in
3668
AArch64MachineDeps::gen_sp_reg_adjust(i32::try_from(incoming_args_diff).unwrap())
3669
{
3670
inst.emit(sink, emit_info, state);
3671
}
3672
}
3673
3674
if let Some(key) = info.key {
3675
sink.put4(key.enc_auti_hint());
3676
}
3677
}
3678
3679