Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/aarch64/inst/emit.rs
3073 views
1
//! AArch64 ISA: binary code emission.
2
3
use cranelift_control::ControlPlane;
4
5
use crate::ir::{self, types::*};
6
use crate::isa::aarch64::inst::*;
7
use crate::trace;
8
9
/// Memory addressing mode finalization: convert "special" modes (e.g.,
10
/// generic arbitrary stack offset) into real addressing modes, possibly by
11
/// emitting some helper instructions that come immediately before the use
12
/// of this amode.
13
pub fn mem_finalize(
14
sink: Option<&mut MachBuffer<Inst>>,
15
mem: &AMode,
16
access_ty: Type,
17
state: &EmitState,
18
) -> (SmallVec<[Inst; 4]>, AMode) {
19
match mem {
20
&AMode::RegOffset { off, .. }
21
| &AMode::SPOffset { off }
22
| &AMode::FPOffset { off }
23
| &AMode::IncomingArg { off }
24
| &AMode::SlotOffset { off } => {
25
let basereg = match mem {
26
&AMode::RegOffset { rn, .. } => rn,
27
&AMode::SPOffset { .. }
28
| &AMode::SlotOffset { .. }
29
| &AMode::IncomingArg { .. } => stack_reg(),
30
&AMode::FPOffset { .. } => fp_reg(),
31
_ => unreachable!(),
32
};
33
let off = match mem {
34
&AMode::IncomingArg { .. } => {
35
let frame_layout = state.frame_layout();
36
i64::from(
37
frame_layout.setup_area_size
38
+ frame_layout.tail_args_size
39
+ frame_layout.clobber_size
40
+ frame_layout.fixed_frame_storage_size
41
+ frame_layout.outgoing_args_size,
42
) - off
43
}
44
&AMode::SlotOffset { .. } => {
45
let adj = i64::from(state.frame_layout().outgoing_args_size);
46
trace!(
47
"mem_finalize: slot offset {} + adj {} -> {}",
48
off,
49
adj,
50
off + adj
51
);
52
off + adj
53
}
54
_ => off,
55
};
56
57
if let Some(simm9) = SImm9::maybe_from_i64(off) {
58
let mem = AMode::Unscaled { rn: basereg, simm9 };
59
(smallvec![], mem)
60
} else if let Some(uimm12) = UImm12Scaled::maybe_from_i64(off, access_ty) {
61
let mem = AMode::UnsignedOffset {
62
rn: basereg,
63
uimm12,
64
};
65
(smallvec![], mem)
66
} else {
67
let tmp = writable_spilltmp_reg();
68
(
69
Inst::load_constant(tmp, off as u64),
70
AMode::RegExtended {
71
rn: basereg,
72
rm: tmp.to_reg(),
73
extendop: ExtendOp::SXTX,
74
},
75
)
76
}
77
}
78
79
AMode::Const { addr } => {
80
let sink = match sink {
81
Some(sink) => sink,
82
None => return (smallvec![], mem.clone()),
83
};
84
let label = sink.get_label_for_constant(*addr);
85
let label = MemLabel::Mach(label);
86
(smallvec![], AMode::Label { label })
87
}
88
89
_ => (smallvec![], mem.clone()),
90
}
91
}
92
93
//=============================================================================
94
// Instructions and subcomponents: emission
95
96
pub(crate) fn machreg_to_gpr(m: Reg) -> u32 {
97
assert_eq!(m.class(), RegClass::Int);
98
u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
99
}
100
101
pub(crate) fn machreg_to_vec(m: Reg) -> u32 {
102
assert_eq!(m.class(), RegClass::Float);
103
u32::from(m.to_real_reg().unwrap().hw_enc())
104
}
105
106
fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
107
u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
108
}
109
110
/// Encode a 3-register aeithmeric instruction.
111
pub fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
112
(bits_31_21 << 21)
113
| (bits_15_10 << 10)
114
| machreg_to_gpr(rd.to_reg())
115
| (machreg_to_gpr(rn) << 5)
116
| (machreg_to_gpr(rm) << 16)
117
}
118
119
fn enc_arith_rr_imm12(
120
bits_31_24: u32,
121
immshift: u32,
122
imm12: u32,
123
rn: Reg,
124
rd: Writable<Reg>,
125
) -> u32 {
126
(bits_31_24 << 24)
127
| (immshift << 22)
128
| (imm12 << 10)
129
| (machreg_to_gpr(rn) << 5)
130
| machreg_to_gpr(rd.to_reg())
131
}
132
133
fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
134
(bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
135
}
136
137
fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
138
(top11 << 21)
139
| (machreg_to_gpr(rm) << 16)
140
| (bit15 << 15)
141
| (machreg_to_gpr(ra) << 10)
142
| (machreg_to_gpr(rn) << 5)
143
| machreg_to_gpr(rd.to_reg())
144
}
145
146
fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
147
assert!(off_26_0 < (1 << 26));
148
(op_31_26 << 26) | off_26_0
149
}
150
151
fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
152
assert!(off_18_0 < (1 << 19));
153
(op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
154
}
155
156
fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
157
assert!(off_18_0 < (1 << 19));
158
assert!(cond < (1 << 4));
159
(op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
160
}
161
162
/// Set the size bit of an instruction.
163
fn enc_op_size(op: u32, size: OperandSize) -> u32 {
164
(op & !(1 << 31)) | (size.sf_bit() << 31)
165
}
166
167
fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
168
match kind {
169
CondBrKind::Zero(reg, size) => enc_op_size(
170
enc_cmpbr(0b0_011010_0, taken.as_offset19_or_zero(), reg),
171
size,
172
),
173
CondBrKind::NotZero(reg, size) => enc_op_size(
174
enc_cmpbr(0b0_011010_1, taken.as_offset19_or_zero(), reg),
175
size,
176
),
177
CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
178
}
179
}
180
181
fn enc_test_bit_and_branch(
182
kind: TestBitAndBranchKind,
183
taken: BranchTarget,
184
reg: Reg,
185
bit: u8,
186
) -> u32 {
187
assert!(bit < 64);
188
let op_31 = u32::from(bit >> 5);
189
let op_23_19 = u32::from(bit & 0b11111);
190
let op_30_24 = 0b0110110
191
| match kind {
192
TestBitAndBranchKind::Z => 0,
193
TestBitAndBranchKind::NZ => 1,
194
};
195
(op_31 << 31)
196
| (op_30_24 << 24)
197
| (op_23_19 << 19)
198
| (taken.as_offset14_or_zero() << 5)
199
| machreg_to_gpr(reg)
200
}
201
202
/// Encode a move-wide instruction.
203
pub fn enc_move_wide(
204
op: MoveWideOp,
205
rd: Writable<Reg>,
206
imm: MoveWideConst,
207
size: OperandSize,
208
) -> u32 {
209
assert!(imm.shift <= 0b11);
210
let op = match op {
211
MoveWideOp::MovN => 0b00,
212
MoveWideOp::MovZ => 0b10,
213
};
214
0x12800000
215
| size.sf_bit() << 31
216
| op << 29
217
| u32::from(imm.shift) << 21
218
| u32::from(imm.bits) << 5
219
| machreg_to_gpr(rd.to_reg())
220
}
221
222
/// Encode a move-keep immediate instruction.
223
pub fn enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {
224
assert!(imm.shift <= 0b11);
225
0x72800000
226
| size.sf_bit() << 31
227
| u32::from(imm.shift) << 21
228
| u32::from(imm.bits) << 5
229
| machreg_to_gpr(rd.to_reg())
230
}
231
232
fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
233
(op_31_22 << 22)
234
| (simm7.bits() << 15)
235
| (machreg_to_gpr(rt2) << 10)
236
| (machreg_to_gpr(rn) << 5)
237
| machreg_to_gpr(rt)
238
}
239
240
fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
241
(op_31_22 << 22)
242
| (simm9.bits() << 12)
243
| (op_11_10 << 10)
244
| (machreg_to_gpr(rn) << 5)
245
| machreg_to_gpr_or_vec(rd)
246
}
247
248
fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
249
(op_31_22 << 22)
250
| (0b1 << 24)
251
| (uimm12.bits() << 10)
252
| (machreg_to_gpr(rn) << 5)
253
| machreg_to_gpr_or_vec(rd)
254
}
255
256
fn enc_ldst_reg(
257
op_31_22: u32,
258
rn: Reg,
259
rm: Reg,
260
s_bit: bool,
261
extendop: Option<ExtendOp>,
262
rd: Reg,
263
) -> u32 {
264
let s_bit = if s_bit { 1 } else { 0 };
265
let extend_bits = match extendop {
266
Some(ExtendOp::UXTW) => 0b010,
267
Some(ExtendOp::SXTW) => 0b110,
268
Some(ExtendOp::SXTX) => 0b111,
269
None => 0b011, // LSL
270
_ => panic!("bad extend mode for ld/st AMode"),
271
};
272
(op_31_22 << 22)
273
| (1 << 21)
274
| (machreg_to_gpr(rm) << 16)
275
| (extend_bits << 13)
276
| (s_bit << 12)
277
| (0b10 << 10)
278
| (machreg_to_gpr(rn) << 5)
279
| machreg_to_gpr_or_vec(rd)
280
}
281
282
pub(crate) fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
283
(op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
284
}
285
286
fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
287
debug_assert_eq!(q & 0b1, q);
288
debug_assert_eq!(size & 0b11, size);
289
0b0_0_0011010_10_00000_110_0_00_00000_00000
290
| q << 30
291
| size << 10
292
| machreg_to_gpr(rn) << 5
293
| machreg_to_vec(rt.to_reg())
294
}
295
296
fn enc_ldst_vec_pair(
297
opc: u32,
298
amode: u32,
299
is_load: bool,
300
simm7: SImm7Scaled,
301
rn: Reg,
302
rt: Reg,
303
rt2: Reg,
304
) -> u32 {
305
debug_assert_eq!(opc & 0b11, opc);
306
debug_assert_eq!(amode & 0b11, amode);
307
308
0b00_10110_00_0_0000000_00000_00000_00000
309
| opc << 30
310
| amode << 23
311
| (is_load as u32) << 22
312
| simm7.bits() << 15
313
| machreg_to_vec(rt2) << 10
314
| machreg_to_gpr(rn) << 5
315
| machreg_to_vec(rt)
316
}
317
318
fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
319
(top11 << 21)
320
| (machreg_to_vec(rm) << 16)
321
| (bit15_10 << 10)
322
| (machreg_to_vec(rn) << 5)
323
| machreg_to_vec(rd.to_reg())
324
}
325
326
fn enc_vec_rrr_long(
327
q: u32,
328
u: u32,
329
size: u32,
330
bit14: u32,
331
rm: Reg,
332
rn: Reg,
333
rd: Writable<Reg>,
334
) -> u32 {
335
debug_assert_eq!(q & 0b1, q);
336
debug_assert_eq!(u & 0b1, u);
337
debug_assert_eq!(size & 0b11, size);
338
debug_assert_eq!(bit14 & 0b1, bit14);
339
340
0b0_0_0_01110_00_1_00000_100000_00000_00000
341
| q << 30
342
| u << 29
343
| size << 22
344
| bit14 << 14
345
| (machreg_to_vec(rm) << 16)
346
| (machreg_to_vec(rn) << 5)
347
| machreg_to_vec(rd.to_reg())
348
}
349
350
fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
351
(0b01011010110 << 21)
352
| size << 31
353
| opcode2 << 16
354
| opcode1 << 10
355
| machreg_to_gpr(rn) << 5
356
| machreg_to_gpr(rd.to_reg())
357
}
358
359
pub(crate) fn enc_br(rn: Reg) -> u32 {
360
0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
361
}
362
363
pub(crate) fn enc_adr_inst(opcode: u32, off: i32, rd: Writable<Reg>) -> u32 {
364
let off = u32::try_from(off).unwrap();
365
let immlo = off & 3;
366
let immhi = (off >> 2) & ((1 << 19) - 1);
367
opcode | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
368
}
369
370
pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
371
let opcode = 0b00010000 << 24;
372
enc_adr_inst(opcode, off, rd)
373
}
374
375
pub(crate) fn enc_adrp(off: i32, rd: Writable<Reg>) -> u32 {
376
let opcode = 0b10010000 << 24;
377
enc_adr_inst(opcode, off, rd)
378
}
379
380
fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 {
381
debug_assert_eq!(op & 0b1, op);
382
debug_assert_eq!(o2 & 0b1, o2);
383
0b100_11010100_00000_0000_00_00000_00000
384
| (op << 30)
385
| (machreg_to_gpr(rm) << 16)
386
| (cond.bits() << 12)
387
| (o2 << 10)
388
| (machreg_to_gpr(rn) << 5)
389
| machreg_to_gpr(rd.to_reg())
390
}
391
392
fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
393
0b000_11110_00_1_00000_0000_11_00000_00000
394
| (size.ftype() << 22)
395
| (machreg_to_vec(rm) << 16)
396
| (machreg_to_vec(rn) << 5)
397
| machreg_to_vec(rd.to_reg())
398
| (cond.bits() << 12)
399
}
400
401
fn enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32 {
402
0b0_1_1_11010010_00000_0000_00_00000_0_0000
403
| size.sf_bit() << 31
404
| machreg_to_gpr(rm) << 16
405
| cond.bits() << 12
406
| machreg_to_gpr(rn) << 5
407
| nzcv.bits()
408
}
409
410
fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
411
0b0_1_1_11010010_00000_0000_10_00000_0_0000
412
| size.sf_bit() << 31
413
| imm.bits() << 16
414
| cond.bits() << 12
415
| machreg_to_gpr(rn) << 5
416
| nzcv.bits()
417
}
418
419
fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
420
match size {
421
OperandSize::Size64 => {
422
debug_assert!(immr <= 63);
423
debug_assert!(imms <= 63);
424
}
425
OperandSize::Size32 => {
426
debug_assert!(immr <= 31);
427
debug_assert!(imms <= 31);
428
}
429
}
430
debug_assert_eq!(opc & 0b11, opc);
431
let n_bit = size.sf_bit();
432
0b0_00_100110_0_000000_000000_00000_00000
433
| size.sf_bit() << 31
434
| u32::from(opc) << 29
435
| n_bit << 22
436
| u32::from(immr) << 16
437
| u32::from(imms) << 10
438
| machreg_to_gpr(rn) << 5
439
| machreg_to_gpr(rd.to_reg())
440
}
441
442
fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
443
0b00001110_101_00000_00011_1_00000_00000
444
| ((is_16b as u32) << 30)
445
| machreg_to_vec(rd.to_reg())
446
| (machreg_to_vec(rn) << 16)
447
| (machreg_to_vec(rn) << 5)
448
}
449
450
fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
451
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
452
}
453
454
fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
455
(top22 << 10)
456
| (machreg_to_vec(rm) << 16)
457
| (machreg_to_vec(rn) << 5)
458
| machreg_to_vec(rd.to_reg())
459
}
460
461
fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
462
(top17 << 15)
463
| (machreg_to_vec(rm) << 16)
464
| (machreg_to_vec(ra) << 10)
465
| (machreg_to_vec(rn) << 5)
466
| machreg_to_vec(rd.to_reg())
467
}
468
469
fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
470
0b000_11110_00_1_00000_00_1000_00000_00000
471
| (size.ftype() << 22)
472
| (machreg_to_vec(rm) << 16)
473
| (machreg_to_vec(rn) << 5)
474
}
475
476
fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
477
(top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
478
}
479
480
fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
481
(top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
482
}
483
484
fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
485
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
486
}
487
488
fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
489
debug_assert_eq!(qu & 0b11, qu);
490
debug_assert_eq!(size & 0b11, size);
491
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
492
let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
493
bits | qu << 29
494
| size << 22
495
| bits_12_16 << 12
496
| machreg_to_vec(rn) << 5
497
| machreg_to_vec(rd.to_reg())
498
}
499
500
fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
501
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
502
503
0b010_11110_11_11000_11011_10_00000_00000
504
| bits_12_16 << 12
505
| machreg_to_vec(rn) << 5
506
| machreg_to_vec(rd.to_reg())
507
}
508
509
fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
510
debug_assert_eq!(u & 0b1, u);
511
debug_assert_eq!(enc_size & 0b1, enc_size);
512
513
0b0_1_0_01110_00_10000_00_0_10_10_00000_00000
514
| u << 29
515
| enc_size << 22
516
| machreg_to_vec(rn) << 5
517
| machreg_to_vec(rd.to_reg())
518
}
519
520
fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
521
debug_assert_eq!(q & 0b1, q);
522
debug_assert_eq!(u & 0b1, u);
523
debug_assert_eq!(size & 0b11, size);
524
debug_assert_eq!(opcode & 0b11111, opcode);
525
0b0_0_0_01110_00_11000_0_0000_10_00000_00000
526
| q << 30
527
| u << 29
528
| size << 22
529
| opcode << 12
530
| machreg_to_vec(rn) << 5
531
| machreg_to_vec(rd.to_reg())
532
}
533
534
fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
535
debug_assert_eq!(len & 0b11, len);
536
0b0_1_001110_000_00000_0_00_0_00_00000_00000
537
| (machreg_to_vec(rm) << 16)
538
| len << 13
539
| (is_extension as u32) << 12
540
| (machreg_to_vec(rn) << 5)
541
| machreg_to_vec(rd.to_reg())
542
}
543
544
fn enc_dmb_ish() -> u32 {
545
0xD5033BBF
546
}
547
548
fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
549
assert!(machreg_to_gpr(rt.to_reg()) != 31);
550
let sz = match ty {
551
I64 => 0b11,
552
I32 => 0b10,
553
I16 => 0b01,
554
I8 => 0b00,
555
_ => unreachable!(),
556
};
557
let bit15 = match op {
558
AtomicRMWOp::Swp => 0b1,
559
_ => 0b0,
560
};
561
let op = match op {
562
AtomicRMWOp::Add => 0b000,
563
AtomicRMWOp::Clr => 0b001,
564
AtomicRMWOp::Eor => 0b010,
565
AtomicRMWOp::Set => 0b011,
566
AtomicRMWOp::Smax => 0b100,
567
AtomicRMWOp::Smin => 0b101,
568
AtomicRMWOp::Umax => 0b110,
569
AtomicRMWOp::Umin => 0b111,
570
AtomicRMWOp::Swp => 0b000,
571
};
572
0b00_111_000_111_00000_0_000_00_00000_00000
573
| (sz << 30)
574
| (machreg_to_gpr(rs) << 16)
575
| bit15 << 15
576
| (op << 12)
577
| (machreg_to_gpr(rn) << 5)
578
| machreg_to_gpr(rt.to_reg())
579
}
580
581
fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
582
let sz = match ty {
583
I64 => 0b11,
584
I32 => 0b10,
585
I16 => 0b01,
586
I8 => 0b00,
587
_ => unreachable!(),
588
};
589
0b00_001000_1_1_0_11111_1_11111_00000_00000
590
| (sz << 30)
591
| (machreg_to_gpr(rn) << 5)
592
| machreg_to_gpr(rt.to_reg())
593
}
594
595
fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {
596
let sz = match ty {
597
I64 => 0b11,
598
I32 => 0b10,
599
I16 => 0b01,
600
I8 => 0b00,
601
_ => unreachable!(),
602
};
603
0b00_001000_100_11111_1_11111_00000_00000
604
| (sz << 30)
605
| (machreg_to_gpr(rn) << 5)
606
| machreg_to_gpr(rt)
607
}
608
609
fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
610
let sz = match ty {
611
I64 => 0b11,
612
I32 => 0b10,
613
I16 => 0b01,
614
I8 => 0b00,
615
_ => unreachable!(),
616
};
617
0b00_001000_0_1_0_11111_1_11111_00000_00000
618
| (sz << 30)
619
| (machreg_to_gpr(rn) << 5)
620
| machreg_to_gpr(rt.to_reg())
621
}
622
623
fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
624
let sz = match ty {
625
I64 => 0b11,
626
I32 => 0b10,
627
I16 => 0b01,
628
I8 => 0b00,
629
_ => unreachable!(),
630
};
631
0b00_001000_000_00000_1_11111_00000_00000
632
| (sz << 30)
633
| (machreg_to_gpr(rs.to_reg()) << 16)
634
| (machreg_to_gpr(rn) << 5)
635
| machreg_to_gpr(rt)
636
}
637
638
fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
639
debug_assert_eq!(size & 0b11, size);
640
641
0b00_0010001_1_1_00000_1_11111_00000_00000
642
| size << 30
643
| machreg_to_gpr(rs.to_reg()) << 16
644
| machreg_to_gpr(rn) << 5
645
| machreg_to_gpr(rt)
646
}
647
648
fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
649
let abc = (imm >> 5) as u32;
650
let defgh = (imm & 0b11111) as u32;
651
652
debug_assert_eq!(cmode & 0b1111, cmode);
653
debug_assert_eq!(q_op & 0b11, q_op);
654
655
0b0_0_0_0111100000_000_0000_01_00000_00000
656
| (q_op << 29)
657
| (abc << 16)
658
| (cmode << 12)
659
| (defgh << 5)
660
| machreg_to_vec(rd.to_reg())
661
}
662
663
/// State carried between emissions of a sequence of instructions.
664
#[derive(Default, Clone, Debug)]
665
pub struct EmitState {
666
/// The user stack map for the upcoming instruction, as provided to
667
/// `pre_safepoint()`.
668
user_stack_map: Option<ir::UserStackMap>,
669
670
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
671
/// optimized away at compiletime. See [cranelift_control].
672
ctrl_plane: ControlPlane,
673
674
frame_layout: FrameLayout,
675
}
676
677
impl MachInstEmitState<Inst> for EmitState {
678
fn new(abi: &Callee<AArch64MachineDeps>, ctrl_plane: ControlPlane) -> Self {
679
EmitState {
680
user_stack_map: None,
681
ctrl_plane,
682
frame_layout: abi.frame_layout().clone(),
683
}
684
}
685
686
fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
687
self.user_stack_map = user_stack_map;
688
}
689
690
fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
691
&mut self.ctrl_plane
692
}
693
694
fn take_ctrl_plane(self) -> ControlPlane {
695
self.ctrl_plane
696
}
697
698
fn frame_layout(&self) -> &FrameLayout {
699
&self.frame_layout
700
}
701
}
702
703
impl EmitState {
704
fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
705
self.user_stack_map.take()
706
}
707
708
fn clear_post_insn(&mut self) {
709
self.user_stack_map = None;
710
}
711
}
712
713
/// Constant state used during function compilation.
714
pub struct EmitInfo(settings::Flags);
715
716
impl EmitInfo {
717
/// Create a constant state for emission of instructions.
718
pub fn new(flags: settings::Flags) -> Self {
719
Self(flags)
720
}
721
}
722
723
impl MachInstEmit for Inst {
724
type State = EmitState;
725
type Info = EmitInfo;
726
727
fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
728
// N.B.: we *must* not exceed the "worst-case size" used to compute
729
// where to insert islands, except when islands are explicitly triggered
730
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
731
// to allow disabling the check for `JTSequence`, which is always
732
// emitted following an `EmitIsland`.
733
let mut start_off = sink.cur_offset();
734
735
match self {
736
&Inst::AluRRR {
737
alu_op,
738
size,
739
rd,
740
rn,
741
rm,
742
} => {
743
debug_assert!(match alu_op {
744
ALUOp::SMulH | ALUOp::UMulH => size == OperandSize::Size64,
745
_ => true,
746
});
747
let top11 = match alu_op {
748
ALUOp::Add => 0b00001011_000,
749
ALUOp::Adc => 0b00011010_000,
750
ALUOp::AdcS => 0b00111010_000,
751
ALUOp::Sub => 0b01001011_000,
752
ALUOp::Sbc => 0b01011010_000,
753
ALUOp::SbcS => 0b01111010_000,
754
ALUOp::Orr => 0b00101010_000,
755
ALUOp::And => 0b00001010_000,
756
ALUOp::AndS => 0b01101010_000,
757
ALUOp::Eor => 0b01001010_000,
758
ALUOp::OrrNot => 0b00101010_001,
759
ALUOp::AndNot => 0b00001010_001,
760
ALUOp::EorNot => 0b01001010_001,
761
ALUOp::AddS => 0b00101011_000,
762
ALUOp::SubS => 0b01101011_000,
763
ALUOp::SDiv | ALUOp::UDiv => 0b00011010_110,
764
ALUOp::Extr | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110,
765
ALUOp::SMulH => 0b10011011_010,
766
ALUOp::UMulH => 0b10011011_110,
767
};
768
769
let top11 = top11 | size.sf_bit() << 10;
770
let bit15_10 = match alu_op {
771
ALUOp::SDiv => 0b000011,
772
ALUOp::UDiv => 0b000010,
773
ALUOp::Extr => 0b001011,
774
ALUOp::Lsr => 0b001001,
775
ALUOp::Asr => 0b001010,
776
ALUOp::Lsl => 0b001000,
777
ALUOp::SMulH | ALUOp::UMulH => 0b011111,
778
_ => 0b000000,
779
};
780
debug_assert_ne!(writable_stack_reg(), rd);
781
// The stack pointer is the zero register in this context, so this might be an
782
// indication that something is wrong.
783
debug_assert_ne!(stack_reg(), rn);
784
debug_assert_ne!(stack_reg(), rm);
785
sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
786
}
787
&Inst::AluRRRR {
788
alu_op,
789
size,
790
rd,
791
rm,
792
rn,
793
ra,
794
} => {
795
let (top11, bit15) = match alu_op {
796
ALUOp3::MAdd => (0b0_00_11011_000, 0),
797
ALUOp3::MSub => (0b0_00_11011_000, 1),
798
ALUOp3::UMAddL => {
799
debug_assert!(size == OperandSize::Size32);
800
(0b1_00_11011_1_01, 0)
801
}
802
ALUOp3::SMAddL => {
803
debug_assert!(size == OperandSize::Size32);
804
(0b1_00_11011_0_01, 0)
805
}
806
};
807
let top11 = top11 | size.sf_bit() << 10;
808
sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
809
}
810
&Inst::AluRRImm12 {
811
alu_op,
812
size,
813
rd,
814
rn,
815
ref imm12,
816
} => {
817
let top8 = match alu_op {
818
ALUOp::Add => 0b000_10001,
819
ALUOp::Sub => 0b010_10001,
820
ALUOp::AddS => 0b001_10001,
821
ALUOp::SubS => 0b011_10001,
822
_ => unimplemented!("{:?}", alu_op),
823
};
824
let top8 = top8 | size.sf_bit() << 7;
825
sink.put4(enc_arith_rr_imm12(
826
top8,
827
imm12.shift_bits(),
828
imm12.imm_bits(),
829
rn,
830
rd,
831
));
832
}
833
&Inst::AluRRImmLogic {
834
alu_op,
835
size,
836
rd,
837
rn,
838
ref imml,
839
} => {
840
let (top9, inv) = match alu_op {
841
ALUOp::Orr => (0b001_100100, false),
842
ALUOp::And => (0b000_100100, false),
843
ALUOp::AndS => (0b011_100100, false),
844
ALUOp::Eor => (0b010_100100, false),
845
ALUOp::OrrNot => (0b001_100100, true),
846
ALUOp::AndNot => (0b000_100100, true),
847
ALUOp::EorNot => (0b010_100100, true),
848
_ => unimplemented!("{:?}", alu_op),
849
};
850
let top9 = top9 | size.sf_bit() << 8;
851
let imml = if inv { imml.invert() } else { *imml };
852
sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
853
}
854
855
&Inst::AluRRImmShift {
856
alu_op,
857
size,
858
rd,
859
rn,
860
ref immshift,
861
} => {
862
let amt = immshift.value();
863
let (top10, immr, imms) = match alu_op {
864
ALUOp::Extr => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
865
ALUOp::Lsr => (0b0101001100, u32::from(amt), 0b011111),
866
ALUOp::Asr => (0b0001001100, u32::from(amt), 0b011111),
867
ALUOp::Lsl => {
868
let bits = if size.is64() { 64 } else { 32 };
869
(
870
0b0101001100,
871
u32::from((bits - amt) % bits),
872
u32::from(bits - 1 - amt),
873
)
874
}
875
_ => unimplemented!("{:?}", alu_op),
876
};
877
let top10 = top10 | size.sf_bit() << 9 | size.sf_bit();
878
let imms = match alu_op {
879
ALUOp::Lsr | ALUOp::Asr => imms | size.sf_bit() << 5,
880
_ => imms,
881
};
882
sink.put4(
883
(top10 << 22)
884
| (immr << 16)
885
| (imms << 10)
886
| (machreg_to_gpr(rn) << 5)
887
| machreg_to_gpr(rd.to_reg()),
888
);
889
}
890
891
&Inst::AluRRRShift {
892
alu_op,
893
size,
894
rd,
895
rn,
896
rm,
897
ref shiftop,
898
} => {
899
let top11: u32 = match alu_op {
900
ALUOp::Add => 0b000_01011000,
901
ALUOp::AddS => 0b001_01011000,
902
ALUOp::Sub => 0b010_01011000,
903
ALUOp::SubS => 0b011_01011000,
904
ALUOp::Orr => 0b001_01010000,
905
ALUOp::And => 0b000_01010000,
906
ALUOp::AndS => 0b011_01010000,
907
ALUOp::Eor => 0b010_01010000,
908
ALUOp::OrrNot => 0b001_01010001,
909
ALUOp::EorNot => 0b010_01010001,
910
ALUOp::AndNot => 0b000_01010001,
911
ALUOp::Extr => 0b000_10011100,
912
_ => unimplemented!("{:?}", alu_op),
913
};
914
let top11 = top11 | size.sf_bit() << 10;
915
let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
916
let bits_15_10 = u32::from(shiftop.amt().value());
917
sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
918
}
919
920
&Inst::AluRRRExtend {
921
alu_op,
922
size,
923
rd,
924
rn,
925
rm,
926
extendop,
927
} => {
928
let top11: u32 = match alu_op {
929
ALUOp::Add => 0b00001011001,
930
ALUOp::Sub => 0b01001011001,
931
ALUOp::AddS => 0b00101011001,
932
ALUOp::SubS => 0b01101011001,
933
_ => unimplemented!("{:?}", alu_op),
934
};
935
let top11 = top11 | size.sf_bit() << 10;
936
let bits_15_10 = u32::from(extendop.bits()) << 3;
937
sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
938
}
939
940
&Inst::BitRR {
941
op, size, rd, rn, ..
942
} => {
943
let (op1, op2) = match op {
944
BitOp::RBit => (0b00000, 0b000000),
945
BitOp::Clz => (0b00000, 0b000100),
946
BitOp::Cls => (0b00000, 0b000101),
947
BitOp::Rev16 => (0b00000, 0b000001),
948
BitOp::Rev32 => (0b00000, 0b000010),
949
BitOp::Rev64 => (0b00000, 0b000011),
950
};
951
sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd))
952
}
953
954
&Inst::ULoad8 { rd, ref mem, flags }
955
| &Inst::SLoad8 { rd, ref mem, flags }
956
| &Inst::ULoad16 { rd, ref mem, flags }
957
| &Inst::SLoad16 { rd, ref mem, flags }
958
| &Inst::ULoad32 { rd, ref mem, flags }
959
| &Inst::SLoad32 { rd, ref mem, flags }
960
| &Inst::ULoad64 {
961
rd, ref mem, flags, ..
962
}
963
| &Inst::FpuLoad16 { rd, ref mem, flags }
964
| &Inst::FpuLoad32 { rd, ref mem, flags }
965
| &Inst::FpuLoad64 { rd, ref mem, flags }
966
| &Inst::FpuLoad128 { rd, ref mem, flags } => {
967
let mem = mem.clone();
968
let access_ty = self.mem_type().unwrap();
969
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
970
971
for inst in mem_insts.into_iter() {
972
inst.emit(sink, emit_info, state);
973
}
974
975
// ldst encoding helpers take Reg, not Writable<Reg>.
976
let rd = rd.to_reg();
977
978
// This is the base opcode (top 10 bits) for the "unscaled
979
// immediate" form (Unscaled). Other addressing modes will OR in
980
// other values for bits 24/25 (bits 1/2 of this constant).
981
let op = match self {
982
Inst::ULoad8 { .. } => 0b0011100001,
983
Inst::SLoad8 { .. } => 0b0011100010,
984
Inst::ULoad16 { .. } => 0b0111100001,
985
Inst::SLoad16 { .. } => 0b0111100010,
986
Inst::ULoad32 { .. } => 0b1011100001,
987
Inst::SLoad32 { .. } => 0b1011100010,
988
Inst::ULoad64 { .. } => 0b1111100001,
989
Inst::FpuLoad16 { .. } => 0b0111110001,
990
Inst::FpuLoad32 { .. } => 0b1011110001,
991
Inst::FpuLoad64 { .. } => 0b1111110001,
992
Inst::FpuLoad128 { .. } => 0b0011110011,
993
_ => unreachable!(),
994
};
995
996
if let Some(trap_code) = flags.trap_code() {
997
// Register the offset at which the actual load instruction starts.
998
sink.add_trap(trap_code);
999
}
1000
1001
match &mem {
1002
&AMode::Unscaled { rn, simm9 } => {
1003
let reg = rn;
1004
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1005
}
1006
&AMode::UnsignedOffset { rn, uimm12 } => {
1007
let reg = rn;
1008
sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1009
}
1010
&AMode::RegReg { rn, rm } => {
1011
let r1 = rn;
1012
let r2 = rm;
1013
sink.put4(enc_ldst_reg(
1014
op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1015
));
1016
}
1017
&AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1018
let r1 = rn;
1019
let r2 = rm;
1020
let extendop = match &mem {
1021
&AMode::RegScaled { .. } => None,
1022
&AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1023
_ => unreachable!(),
1024
};
1025
sink.put4(enc_ldst_reg(
1026
op, r1, r2, /* scaled = */ true, extendop, rd,
1027
));
1028
}
1029
&AMode::RegExtended { rn, rm, extendop } => {
1030
let r1 = rn;
1031
let r2 = rm;
1032
sink.put4(enc_ldst_reg(
1033
op,
1034
r1,
1035
r2,
1036
/* scaled = */ false,
1037
Some(extendop),
1038
rd,
1039
));
1040
}
1041
&AMode::Label { ref label } => {
1042
let offset = match label {
1043
// cast i32 to u32 (two's-complement)
1044
MemLabel::PCRel(off) => *off as u32,
1045
// Emit a relocation into the `MachBuffer`
1046
// for the label that's being loaded from and
1047
// encode an address of 0 in its place which will
1048
// get filled in by relocation resolution later on.
1049
MemLabel::Mach(label) => {
1050
sink.use_label_at_offset(
1051
sink.cur_offset(),
1052
*label,
1053
LabelUse::Ldr19,
1054
);
1055
0
1056
}
1057
} / 4;
1058
assert!(offset < (1 << 19));
1059
match self {
1060
&Inst::ULoad32 { .. } => {
1061
sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
1062
}
1063
&Inst::SLoad32 { .. } => {
1064
sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
1065
}
1066
&Inst::FpuLoad32 { .. } => {
1067
sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
1068
}
1069
&Inst::ULoad64 { .. } => {
1070
sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
1071
}
1072
&Inst::FpuLoad64 { .. } => {
1073
sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
1074
}
1075
&Inst::FpuLoad128 { .. } => {
1076
sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
1077
}
1078
_ => panic!("Unsupported size for LDR from constant pool!"),
1079
}
1080
}
1081
&AMode::SPPreIndexed { simm9 } => {
1082
let reg = stack_reg();
1083
sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1084
}
1085
&AMode::SPPostIndexed { simm9 } => {
1086
let reg = stack_reg();
1087
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1088
}
1089
// Eliminated by `mem_finalize()` above.
1090
&AMode::SPOffset { .. }
1091
| &AMode::FPOffset { .. }
1092
| &AMode::IncomingArg { .. }
1093
| &AMode::SlotOffset { .. }
1094
| &AMode::Const { .. }
1095
| &AMode::RegOffset { .. } => {
1096
panic!("Should not see {mem:?} here!")
1097
}
1098
}
1099
}
1100
1101
&Inst::Store8 { rd, ref mem, flags }
1102
| &Inst::Store16 { rd, ref mem, flags }
1103
| &Inst::Store32 { rd, ref mem, flags }
1104
| &Inst::Store64 { rd, ref mem, flags }
1105
| &Inst::FpuStore16 { rd, ref mem, flags }
1106
| &Inst::FpuStore32 { rd, ref mem, flags }
1107
| &Inst::FpuStore64 { rd, ref mem, flags }
1108
| &Inst::FpuStore128 { rd, ref mem, flags } => {
1109
let mem = mem.clone();
1110
let access_ty = self.mem_type().unwrap();
1111
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
1112
1113
for inst in mem_insts.into_iter() {
1114
inst.emit(sink, emit_info, state);
1115
}
1116
1117
let op = match self {
1118
Inst::Store8 { .. } => 0b0011100000,
1119
Inst::Store16 { .. } => 0b0111100000,
1120
Inst::Store32 { .. } => 0b1011100000,
1121
Inst::Store64 { .. } => 0b1111100000,
1122
Inst::FpuStore16 { .. } => 0b0111110000,
1123
Inst::FpuStore32 { .. } => 0b1011110000,
1124
Inst::FpuStore64 { .. } => 0b1111110000,
1125
Inst::FpuStore128 { .. } => 0b0011110010,
1126
_ => unreachable!(),
1127
};
1128
1129
if let Some(trap_code) = flags.trap_code() {
1130
// Register the offset at which the actual store instruction starts.
1131
sink.add_trap(trap_code);
1132
}
1133
1134
match &mem {
1135
&AMode::Unscaled { rn, simm9 } => {
1136
let reg = rn;
1137
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1138
}
1139
&AMode::UnsignedOffset { rn, uimm12 } => {
1140
let reg = rn;
1141
sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1142
}
1143
&AMode::RegReg { rn, rm } => {
1144
let r1 = rn;
1145
let r2 = rm;
1146
sink.put4(enc_ldst_reg(
1147
op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1148
));
1149
}
1150
&AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1151
let r1 = rn;
1152
let r2 = rm;
1153
let extendop = match &mem {
1154
&AMode::RegScaled { .. } => None,
1155
&AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1156
_ => unreachable!(),
1157
};
1158
sink.put4(enc_ldst_reg(
1159
op, r1, r2, /* scaled = */ true, extendop, rd,
1160
));
1161
}
1162
&AMode::RegExtended { rn, rm, extendop } => {
1163
let r1 = rn;
1164
let r2 = rm;
1165
sink.put4(enc_ldst_reg(
1166
op,
1167
r1,
1168
r2,
1169
/* scaled = */ false,
1170
Some(extendop),
1171
rd,
1172
));
1173
}
1174
&AMode::Label { .. } => {
1175
panic!("Store to a MemLabel not implemented!");
1176
}
1177
&AMode::SPPreIndexed { simm9 } => {
1178
let reg = stack_reg();
1179
sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1180
}
1181
&AMode::SPPostIndexed { simm9 } => {
1182
let reg = stack_reg();
1183
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1184
}
1185
// Eliminated by `mem_finalize()` above.
1186
&AMode::SPOffset { .. }
1187
| &AMode::FPOffset { .. }
1188
| &AMode::IncomingArg { .. }
1189
| &AMode::SlotOffset { .. }
1190
| &AMode::Const { .. }
1191
| &AMode::RegOffset { .. } => {
1192
panic!("Should not see {mem:?} here!")
1193
}
1194
}
1195
}
1196
1197
&Inst::StoreP64 {
1198
rt,
1199
rt2,
1200
ref mem,
1201
flags,
1202
} => {
1203
let mem = mem.clone();
1204
if let Some(trap_code) = flags.trap_code() {
1205
// Register the offset at which the actual store instruction starts.
1206
sink.add_trap(trap_code);
1207
}
1208
match &mem {
1209
&PairAMode::SignedOffset { reg, simm7 } => {
1210
assert_eq!(simm7.scale_ty, I64);
1211
sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
1212
}
1213
&PairAMode::SPPreIndexed { simm7 } => {
1214
assert_eq!(simm7.scale_ty, I64);
1215
let reg = stack_reg();
1216
sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));
1217
}
1218
&PairAMode::SPPostIndexed { simm7 } => {
1219
assert_eq!(simm7.scale_ty, I64);
1220
let reg = stack_reg();
1221
sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));
1222
}
1223
}
1224
}
1225
&Inst::LoadP64 {
1226
rt,
1227
rt2,
1228
ref mem,
1229
flags,
1230
} => {
1231
let rt = rt.to_reg();
1232
let rt2 = rt2.to_reg();
1233
let mem = mem.clone();
1234
if let Some(trap_code) = flags.trap_code() {
1235
// Register the offset at which the actual load instruction starts.
1236
sink.add_trap(trap_code);
1237
}
1238
1239
match &mem {
1240
&PairAMode::SignedOffset { reg, simm7 } => {
1241
assert_eq!(simm7.scale_ty, I64);
1242
sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
1243
}
1244
&PairAMode::SPPreIndexed { simm7 } => {
1245
assert_eq!(simm7.scale_ty, I64);
1246
let reg = stack_reg();
1247
sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));
1248
}
1249
&PairAMode::SPPostIndexed { simm7 } => {
1250
assert_eq!(simm7.scale_ty, I64);
1251
let reg = stack_reg();
1252
sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));
1253
}
1254
}
1255
}
1256
&Inst::FpuLoadP64 {
1257
rt,
1258
rt2,
1259
ref mem,
1260
flags,
1261
}
1262
| &Inst::FpuLoadP128 {
1263
rt,
1264
rt2,
1265
ref mem,
1266
flags,
1267
} => {
1268
let rt = rt.to_reg();
1269
let rt2 = rt2.to_reg();
1270
let mem = mem.clone();
1271
1272
if let Some(trap_code) = flags.trap_code() {
1273
// Register the offset at which the actual load instruction starts.
1274
sink.add_trap(trap_code);
1275
}
1276
1277
let opc = match self {
1278
&Inst::FpuLoadP64 { .. } => 0b01,
1279
&Inst::FpuLoadP128 { .. } => 0b10,
1280
_ => unreachable!(),
1281
};
1282
1283
match &mem {
1284
&PairAMode::SignedOffset { reg, simm7 } => {
1285
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1286
sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
1287
}
1288
&PairAMode::SPPreIndexed { simm7 } => {
1289
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1290
let reg = stack_reg();
1291
sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));
1292
}
1293
&PairAMode::SPPostIndexed { simm7 } => {
1294
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1295
let reg = stack_reg();
1296
sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));
1297
}
1298
}
1299
}
1300
&Inst::FpuStoreP64 {
1301
rt,
1302
rt2,
1303
ref mem,
1304
flags,
1305
}
1306
| &Inst::FpuStoreP128 {
1307
rt,
1308
rt2,
1309
ref mem,
1310
flags,
1311
} => {
1312
let mem = mem.clone();
1313
1314
if let Some(trap_code) = flags.trap_code() {
1315
// Register the offset at which the actual store instruction starts.
1316
sink.add_trap(trap_code);
1317
}
1318
1319
let opc = match self {
1320
&Inst::FpuStoreP64 { .. } => 0b01,
1321
&Inst::FpuStoreP128 { .. } => 0b10,
1322
_ => unreachable!(),
1323
};
1324
1325
match &mem {
1326
&PairAMode::SignedOffset { reg, simm7 } => {
1327
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1328
sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
1329
}
1330
&PairAMode::SPPreIndexed { simm7 } => {
1331
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1332
let reg = stack_reg();
1333
sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));
1334
}
1335
&PairAMode::SPPostIndexed { simm7 } => {
1336
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1337
let reg = stack_reg();
1338
sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));
1339
}
1340
}
1341
}
1342
&Inst::Mov { size, rd, rm } => {
1343
assert!(rd.to_reg().class() == rm.class());
1344
assert!(rm.class() == RegClass::Int);
1345
1346
match size {
1347
OperandSize::Size64 => {
1348
// MOV to SP is interpreted as MOV to XZR instead. And our codegen
1349
// should never MOV to XZR.
1350
assert!(rd.to_reg() != stack_reg());
1351
1352
if rm == stack_reg() {
1353
// We can't use ORR here, so use an `add rd, sp, #0` instead.
1354
let imm12 = Imm12::maybe_from_u64(0).unwrap();
1355
sink.put4(enc_arith_rr_imm12(
1356
0b100_10001,
1357
imm12.shift_bits(),
1358
imm12.imm_bits(),
1359
rm,
1360
rd,
1361
));
1362
} else {
1363
// Encoded as ORR rd, rm, zero.
1364
sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
1365
}
1366
}
1367
OperandSize::Size32 => {
1368
// MOV to SP is interpreted as MOV to XZR instead. And our codegen
1369
// should never MOV to XZR.
1370
assert!(machreg_to_gpr(rd.to_reg()) != 31);
1371
// Encoded as ORR rd, rm, zero.
1372
sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
1373
}
1374
}
1375
}
1376
&Inst::MovFromPReg { rd, rm } => {
1377
let rm: Reg = rm.into();
1378
debug_assert!(
1379
[
1380
regs::fp_reg(),
1381
regs::stack_reg(),
1382
regs::link_reg(),
1383
regs::pinned_reg()
1384
]
1385
.contains(&rm)
1386
);
1387
assert!(rm.class() == RegClass::Int);
1388
assert!(rd.to_reg().class() == rm.class());
1389
let size = OperandSize::Size64;
1390
Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1391
}
1392
&Inst::MovToPReg { rd, rm } => {
1393
let rd: Writable<Reg> = Writable::from_reg(rd.into());
1394
debug_assert!(
1395
[
1396
regs::fp_reg(),
1397
regs::stack_reg(),
1398
regs::link_reg(),
1399
regs::pinned_reg()
1400
]
1401
.contains(&rd.to_reg())
1402
);
1403
assert!(rd.to_reg().class() == RegClass::Int);
1404
assert!(rm.class() == rd.to_reg().class());
1405
let size = OperandSize::Size64;
1406
Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1407
}
1408
&Inst::MovWide { op, rd, imm, size } => {
1409
sink.put4(enc_move_wide(op, rd, imm, size));
1410
}
1411
&Inst::MovK { rd, rn, imm, size } => {
1412
debug_assert_eq!(rn, rd.to_reg());
1413
sink.put4(enc_movk(rd, imm, size));
1414
}
1415
&Inst::CSel { rd, rn, rm, cond } => {
1416
sink.put4(enc_csel(rd, rn, rm, cond, 0, 0));
1417
}
1418
&Inst::CSNeg { rd, rn, rm, cond } => {
1419
sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));
1420
}
1421
&Inst::CSet { rd, cond } => {
1422
sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));
1423
}
1424
&Inst::CSetm { rd, cond } => {
1425
sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0));
1426
}
1427
&Inst::CCmp {
1428
size,
1429
rn,
1430
rm,
1431
nzcv,
1432
cond,
1433
} => {
1434
sink.put4(enc_ccmp(size, rn, rm, nzcv, cond));
1435
}
1436
&Inst::CCmpImm {
1437
size,
1438
rn,
1439
imm,
1440
nzcv,
1441
cond,
1442
} => {
1443
sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
1444
}
1445
&Inst::AtomicRMW {
1446
ty,
1447
op,
1448
rs,
1449
rt,
1450
rn,
1451
flags,
1452
} => {
1453
if let Some(trap_code) = flags.trap_code() {
1454
sink.add_trap(trap_code);
1455
}
1456
1457
sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
1458
}
1459
&Inst::AtomicRMWLoop { ty, op, flags, .. } => {
1460
/* Emit this:
1461
again:
1462
ldaxr{,b,h} x/w27, [x25]
1463
// maybe sign extend
1464
op x28, x27, x26 // op is add,sub,and,orr,eor
1465
stlxr{,b,h} w24, x/w28, [x25]
1466
cbnz x24, again
1467
1468
Operand conventions:
1469
IN: x25 (addr), x26 (2nd arg for op)
1470
OUT: x27 (old value), x24 (trashed), x28 (trashed)
1471
1472
It is unfortunate that, per the ARM documentation, x28 cannot be used for
1473
both the store-data and success-flag operands of stlxr. This causes the
1474
instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
1475
instead for the success-flag.
1476
*/
1477
// TODO: We should not hardcode registers here, a better idea would be to
1478
// pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those
1479
let xzr = zero_reg();
1480
let x24 = xreg(24);
1481
let x25 = xreg(25);
1482
let x26 = xreg(26);
1483
let x27 = xreg(27);
1484
let x28 = xreg(28);
1485
let x24wr = writable_xreg(24);
1486
let x27wr = writable_xreg(27);
1487
let x28wr = writable_xreg(28);
1488
let again_label = sink.get_label();
1489
1490
// again:
1491
sink.bind_label(again_label, &mut state.ctrl_plane);
1492
1493
if let Some(trap_code) = flags.trap_code() {
1494
sink.add_trap(trap_code);
1495
}
1496
1497
sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
1498
let size = OperandSize::from_ty(ty);
1499
let sign_ext = match op {
1500
AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {
1501
I16 => Some((ExtendOp::SXTH, 16)),
1502
I8 => Some((ExtendOp::SXTB, 8)),
1503
_ => None,
1504
},
1505
_ => None,
1506
};
1507
1508
// sxt{b|h} the loaded result if necessary.
1509
if sign_ext.is_some() {
1510
let (_, from_bits) = sign_ext.unwrap();
1511
Inst::Extend {
1512
rd: x27wr,
1513
rn: x27,
1514
signed: true,
1515
from_bits,
1516
to_bits: size.bits(),
1517
}
1518
.emit(sink, emit_info, state);
1519
}
1520
1521
match op {
1522
AtomicRMWLoopOp::Xchg => {} // do nothing
1523
AtomicRMWLoopOp::Nand => {
1524
// and x28, x27, x26
1525
// mvn x28, x28
1526
1527
Inst::AluRRR {
1528
alu_op: ALUOp::And,
1529
size,
1530
rd: x28wr,
1531
rn: x27,
1532
rm: x26,
1533
}
1534
.emit(sink, emit_info, state);
1535
1536
Inst::AluRRR {
1537
alu_op: ALUOp::OrrNot,
1538
size,
1539
rd: x28wr,
1540
rn: xzr,
1541
rm: x28,
1542
}
1543
.emit(sink, emit_info, state);
1544
}
1545
AtomicRMWLoopOp::Umin
1546
| AtomicRMWLoopOp::Umax
1547
| AtomicRMWLoopOp::Smin
1548
| AtomicRMWLoopOp::Smax => {
1549
// cmp x27, x26 {?sxt}
1550
// csel.op x28, x27, x26
1551
1552
let cond = match op {
1553
AtomicRMWLoopOp::Umin => Cond::Lo,
1554
AtomicRMWLoopOp::Umax => Cond::Hi,
1555
AtomicRMWLoopOp::Smin => Cond::Lt,
1556
AtomicRMWLoopOp::Smax => Cond::Gt,
1557
_ => unreachable!(),
1558
};
1559
1560
if sign_ext.is_some() {
1561
let (extendop, _) = sign_ext.unwrap();
1562
Inst::AluRRRExtend {
1563
alu_op: ALUOp::SubS,
1564
size,
1565
rd: writable_zero_reg(),
1566
rn: x27,
1567
rm: x26,
1568
extendop,
1569
}
1570
.emit(sink, emit_info, state);
1571
} else {
1572
Inst::AluRRR {
1573
alu_op: ALUOp::SubS,
1574
size,
1575
rd: writable_zero_reg(),
1576
rn: x27,
1577
rm: x26,
1578
}
1579
.emit(sink, emit_info, state);
1580
}
1581
1582
Inst::CSel {
1583
cond,
1584
rd: x28wr,
1585
rn: x27,
1586
rm: x26,
1587
}
1588
.emit(sink, emit_info, state);
1589
}
1590
_ => {
1591
// add/sub/and/orr/eor x28, x27, x26
1592
let alu_op = match op {
1593
AtomicRMWLoopOp::Add => ALUOp::Add,
1594
AtomicRMWLoopOp::Sub => ALUOp::Sub,
1595
AtomicRMWLoopOp::And => ALUOp::And,
1596
AtomicRMWLoopOp::Orr => ALUOp::Orr,
1597
AtomicRMWLoopOp::Eor => ALUOp::Eor,
1598
AtomicRMWLoopOp::Nand
1599
| AtomicRMWLoopOp::Umin
1600
| AtomicRMWLoopOp::Umax
1601
| AtomicRMWLoopOp::Smin
1602
| AtomicRMWLoopOp::Smax
1603
| AtomicRMWLoopOp::Xchg => unreachable!(),
1604
};
1605
1606
Inst::AluRRR {
1607
alu_op,
1608
size,
1609
rd: x28wr,
1610
rn: x27,
1611
rm: x26,
1612
}
1613
.emit(sink, emit_info, state);
1614
}
1615
}
1616
1617
if let Some(trap_code) = flags.trap_code() {
1618
sink.add_trap(trap_code);
1619
}
1620
if op == AtomicRMWLoopOp::Xchg {
1621
sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
1622
} else {
1623
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1624
}
1625
1626
// cbnz w24, again
1627
// Note, we're actually testing x24, and relying on the default zero-high-half
1628
// rule in the assignment that `stlxr` does.
1629
let br_offset = sink.cur_offset();
1630
sink.put4(enc_conditional_br(
1631
BranchTarget::Label(again_label),
1632
CondBrKind::NotZero(x24, OperandSize::Size64),
1633
));
1634
sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
1635
}
1636
&Inst::AtomicCAS {
1637
rd,
1638
rs,
1639
rt,
1640
rn,
1641
ty,
1642
flags,
1643
} => {
1644
debug_assert_eq!(rd.to_reg(), rs);
1645
let size = match ty {
1646
I8 => 0b00,
1647
I16 => 0b01,
1648
I32 => 0b10,
1649
I64 => 0b11,
1650
_ => panic!("Unsupported type: {ty}"),
1651
};
1652
1653
if let Some(trap_code) = flags.trap_code() {
1654
sink.add_trap(trap_code);
1655
}
1656
1657
sink.put4(enc_cas(size, rd, rt, rn));
1658
}
1659
&Inst::AtomicCASLoop { ty, flags, .. } => {
1660
/* Emit this:
1661
again:
1662
ldaxr{,b,h} x/w27, [x25]
1663
cmp x27, x/w26 uxt{b,h}
1664
b.ne out
1665
stlxr{,b,h} w24, x/w28, [x25]
1666
cbnz x24, again
1667
out:
1668
1669
Operand conventions:
1670
IN: x25 (addr), x26 (expected value), x28 (replacement value)
1671
OUT: x27 (old value), x24 (trashed)
1672
*/
1673
let x24 = xreg(24);
1674
let x25 = xreg(25);
1675
let x26 = xreg(26);
1676
let x27 = xreg(27);
1677
let x28 = xreg(28);
1678
let xzrwr = writable_zero_reg();
1679
let x24wr = writable_xreg(24);
1680
let x27wr = writable_xreg(27);
1681
let again_label = sink.get_label();
1682
let out_label = sink.get_label();
1683
1684
// again:
1685
sink.bind_label(again_label, &mut state.ctrl_plane);
1686
1687
if let Some(trap_code) = flags.trap_code() {
1688
sink.add_trap(trap_code);
1689
}
1690
1691
// ldaxr x27, [x25]
1692
sink.put4(enc_ldaxr(ty, x27wr, x25));
1693
1694
// The top 32-bits are zero-extended by the ldaxr so we don't
1695
// have to use UXTW, just the x-form of the register.
1696
let (bit21, extend_op) = match ty {
1697
I8 => (0b1, 0b000000),
1698
I16 => (0b1, 0b001000),
1699
_ => (0b0, 0b000000),
1700
};
1701
let bits_31_21 = 0b111_01011_000 | bit21;
1702
// cmp x27, x26 (== subs xzr, x27, x26)
1703
sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));
1704
1705
// b.ne out
1706
let br_out_offset = sink.cur_offset();
1707
sink.put4(enc_conditional_br(
1708
BranchTarget::Label(out_label),
1709
CondBrKind::Cond(Cond::Ne),
1710
));
1711
sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
1712
1713
if let Some(trap_code) = flags.trap_code() {
1714
sink.add_trap(trap_code);
1715
}
1716
1717
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1718
1719
// cbnz w24, again.
1720
// Note, we're actually testing x24, and relying on the default zero-high-half
1721
// rule in the assignment that `stlxr` does.
1722
let br_again_offset = sink.cur_offset();
1723
sink.put4(enc_conditional_br(
1724
BranchTarget::Label(again_label),
1725
CondBrKind::NotZero(x24, OperandSize::Size64),
1726
));
1727
sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
1728
1729
// out:
1730
sink.bind_label(out_label, &mut state.ctrl_plane);
1731
}
1732
&Inst::LoadAcquire {
1733
access_ty,
1734
rt,
1735
rn,
1736
flags,
1737
} => {
1738
if let Some(trap_code) = flags.trap_code() {
1739
sink.add_trap(trap_code);
1740
}
1741
1742
sink.put4(enc_ldar(access_ty, rt, rn));
1743
}
1744
&Inst::StoreRelease {
1745
access_ty,
1746
rt,
1747
rn,
1748
flags,
1749
} => {
1750
if let Some(trap_code) = flags.trap_code() {
1751
sink.add_trap(trap_code);
1752
}
1753
1754
sink.put4(enc_stlr(access_ty, rt, rn));
1755
}
1756
&Inst::Fence {} => {
1757
sink.put4(enc_dmb_ish()); // dmb ish
1758
}
1759
&Inst::Csdb {} => {
1760
sink.put4(0xd503229f);
1761
}
1762
&Inst::FpuMove32 { rd, rn } => {
1763
sink.put4(enc_fpurr(0b000_11110_00_1_000000_10000, rd, rn));
1764
}
1765
&Inst::FpuMove64 { rd, rn } => {
1766
sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
1767
}
1768
&Inst::FpuMove128 { rd, rn } => {
1769
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
1770
}
1771
&Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1772
let (imm5, shift, mask) = match size.lane_size() {
1773
ScalarSize::Size32 => (0b00100, 3, 0b011),
1774
ScalarSize::Size64 => (0b01000, 4, 0b001),
1775
_ => unimplemented!(),
1776
};
1777
debug_assert_eq!(idx & mask, idx);
1778
let imm5 = imm5 | ((idx as u32) << shift);
1779
sink.put4(
1780
0b010_11110000_00000_000001_00000_00000
1781
| (imm5 << 16)
1782
| (machreg_to_vec(rn) << 5)
1783
| machreg_to_vec(rd.to_reg()),
1784
);
1785
}
1786
&Inst::FpuExtend { rd, rn, size } => {
1787
sink.put4(enc_fpurr(
1788
0b000_11110_00_1_000000_10000 | (size.ftype() << 12),
1789
rd,
1790
rn,
1791
));
1792
}
1793
&Inst::FpuRR {
1794
fpu_op,
1795
size,
1796
rd,
1797
rn,
1798
} => {
1799
let top22 = match fpu_op {
1800
FPUOp1::Abs => 0b000_11110_00_1_000001_10000,
1801
FPUOp1::Neg => 0b000_11110_00_1_000010_10000,
1802
FPUOp1::Sqrt => 0b000_11110_00_1_000011_10000,
1803
FPUOp1::Cvt32To64 => {
1804
debug_assert_eq!(size, ScalarSize::Size32);
1805
0b000_11110_00_1_000101_10000
1806
}
1807
FPUOp1::Cvt64To32 => {
1808
debug_assert_eq!(size, ScalarSize::Size64);
1809
0b000_11110_01_1_000100_10000
1810
}
1811
};
1812
let top22 = top22 | size.ftype() << 12;
1813
sink.put4(enc_fpurr(top22, rd, rn));
1814
}
1815
&Inst::FpuRRR {
1816
fpu_op,
1817
size,
1818
rd,
1819
rn,
1820
rm,
1821
} => {
1822
let top22 = match fpu_op {
1823
FPUOp2::Add => 0b000_11110_00_1_00000_001010,
1824
FPUOp2::Sub => 0b000_11110_00_1_00000_001110,
1825
FPUOp2::Mul => 0b000_11110_00_1_00000_000010,
1826
FPUOp2::Div => 0b000_11110_00_1_00000_000110,
1827
FPUOp2::Max => 0b000_11110_00_1_00000_010010,
1828
FPUOp2::Min => 0b000_11110_00_1_00000_010110,
1829
};
1830
let top22 = top22 | size.ftype() << 12;
1831
sink.put4(enc_fpurrr(top22, rd, rn, rm));
1832
}
1833
&Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
1834
FPUOpRI::UShr32(imm) => {
1835
debug_assert_eq!(32, imm.lane_size_in_bits);
1836
sink.put4(
1837
0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
1838
| imm.enc() << 16
1839
| machreg_to_vec(rn) << 5
1840
| machreg_to_vec(rd.to_reg()),
1841
)
1842
}
1843
FPUOpRI::UShr64(imm) => {
1844
debug_assert_eq!(64, imm.lane_size_in_bits);
1845
sink.put4(
1846
0b01_1_111110_0000000_00_0_0_0_1_00000_00000
1847
| imm.enc() << 16
1848
| machreg_to_vec(rn) << 5
1849
| machreg_to_vec(rd.to_reg()),
1850
)
1851
}
1852
},
1853
&Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1854
debug_assert_eq!(rd.to_reg(), ri);
1855
match fpu_op {
1856
FPUOpRIMod::Sli64(imm) => {
1857
debug_assert_eq!(64, imm.lane_size_in_bits);
1858
sink.put4(
1859
0b01_1_111110_0000000_010101_00000_00000
1860
| imm.enc() << 16
1861
| machreg_to_vec(rn) << 5
1862
| machreg_to_vec(rd.to_reg()),
1863
)
1864
}
1865
FPUOpRIMod::Sli32(imm) => {
1866
debug_assert_eq!(32, imm.lane_size_in_bits);
1867
sink.put4(
1868
0b0_0_1_011110_0000000_010101_00000_00000
1869
| imm.enc() << 16
1870
| machreg_to_vec(rn) << 5
1871
| machreg_to_vec(rd.to_reg()),
1872
)
1873
}
1874
}
1875
}
1876
&Inst::FpuRRRR {
1877
fpu_op,
1878
size,
1879
rd,
1880
rn,
1881
rm,
1882
ra,
1883
} => {
1884
let top17 = match fpu_op {
1885
FPUOp3::MAdd => 0b000_11111_00_0_00000_0,
1886
FPUOp3::MSub => 0b000_11111_00_0_00000_1,
1887
FPUOp3::NMAdd => 0b000_11111_00_1_00000_0,
1888
FPUOp3::NMSub => 0b000_11111_00_1_00000_1,
1889
};
1890
let top17 = top17 | size.ftype() << 7;
1891
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
1892
}
1893
&Inst::VecMisc { op, rd, rn, size } => {
1894
let (q, enc_size) = size.enc_size();
1895
let (u, bits_12_16, size) = match op {
1896
VecMisc2::Not => (0b1, 0b00101, 0b00),
1897
VecMisc2::Neg => (0b1, 0b01011, enc_size),
1898
VecMisc2::Abs => (0b0, 0b01011, enc_size),
1899
VecMisc2::Fabs => {
1900
debug_assert!(
1901
size == VectorSize::Size32x2
1902
|| size == VectorSize::Size32x4
1903
|| size == VectorSize::Size64x2
1904
);
1905
(0b0, 0b01111, enc_size)
1906
}
1907
VecMisc2::Fneg => {
1908
debug_assert!(
1909
size == VectorSize::Size32x2
1910
|| size == VectorSize::Size32x4
1911
|| size == VectorSize::Size64x2
1912
);
1913
(0b1, 0b01111, enc_size)
1914
}
1915
VecMisc2::Fsqrt => {
1916
debug_assert!(
1917
size == VectorSize::Size32x2
1918
|| size == VectorSize::Size32x4
1919
|| size == VectorSize::Size64x2
1920
);
1921
(0b1, 0b11111, enc_size)
1922
}
1923
VecMisc2::Rev16 => {
1924
debug_assert_eq!(size, VectorSize::Size8x16);
1925
(0b0, 0b00001, enc_size)
1926
}
1927
VecMisc2::Rev32 => {
1928
debug_assert!(size == VectorSize::Size8x16 || size == VectorSize::Size16x8);
1929
(0b1, 0b00000, enc_size)
1930
}
1931
VecMisc2::Rev64 => {
1932
debug_assert!(
1933
size == VectorSize::Size8x16
1934
|| size == VectorSize::Size16x8
1935
|| size == VectorSize::Size32x4
1936
);
1937
(0b0, 0b00000, enc_size)
1938
}
1939
VecMisc2::Fcvtzs => {
1940
debug_assert!(
1941
size == VectorSize::Size32x2
1942
|| size == VectorSize::Size32x4
1943
|| size == VectorSize::Size64x2
1944
);
1945
(0b0, 0b11011, enc_size)
1946
}
1947
VecMisc2::Fcvtzu => {
1948
debug_assert!(
1949
size == VectorSize::Size32x2
1950
|| size == VectorSize::Size32x4
1951
|| size == VectorSize::Size64x2
1952
);
1953
(0b1, 0b11011, enc_size)
1954
}
1955
VecMisc2::Scvtf => {
1956
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1957
(0b0, 0b11101, enc_size & 0b1)
1958
}
1959
VecMisc2::Ucvtf => {
1960
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1961
(0b1, 0b11101, enc_size & 0b1)
1962
}
1963
VecMisc2::Frintn => {
1964
debug_assert!(
1965
size == VectorSize::Size32x2
1966
|| size == VectorSize::Size32x4
1967
|| size == VectorSize::Size64x2
1968
);
1969
(0b0, 0b11000, enc_size & 0b01)
1970
}
1971
VecMisc2::Frintz => {
1972
debug_assert!(
1973
size == VectorSize::Size32x2
1974
|| size == VectorSize::Size32x4
1975
|| size == VectorSize::Size64x2
1976
);
1977
(0b0, 0b11001, enc_size)
1978
}
1979
VecMisc2::Frintm => {
1980
debug_assert!(
1981
size == VectorSize::Size32x2
1982
|| size == VectorSize::Size32x4
1983
|| size == VectorSize::Size64x2
1984
);
1985
(0b0, 0b11001, enc_size & 0b01)
1986
}
1987
VecMisc2::Frintp => {
1988
debug_assert!(
1989
size == VectorSize::Size32x2
1990
|| size == VectorSize::Size32x4
1991
|| size == VectorSize::Size64x2
1992
);
1993
(0b0, 0b11000, enc_size)
1994
}
1995
VecMisc2::Cnt => {
1996
debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
1997
(0b0, 0b00101, enc_size)
1998
}
1999
VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
2000
VecMisc2::Cmge0 => (0b1, 0b01000, enc_size),
2001
VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size),
2002
VecMisc2::Cmle0 => (0b1, 0b01001, enc_size),
2003
VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size),
2004
VecMisc2::Fcmeq0 => {
2005
debug_assert!(
2006
size == VectorSize::Size32x2
2007
|| size == VectorSize::Size32x4
2008
|| size == VectorSize::Size64x2
2009
);
2010
(0b0, 0b01101, enc_size)
2011
}
2012
VecMisc2::Fcmge0 => {
2013
debug_assert!(
2014
size == VectorSize::Size32x2
2015
|| size == VectorSize::Size32x4
2016
|| size == VectorSize::Size64x2
2017
);
2018
(0b1, 0b01100, enc_size)
2019
}
2020
VecMisc2::Fcmgt0 => {
2021
debug_assert!(
2022
size == VectorSize::Size32x2
2023
|| size == VectorSize::Size32x4
2024
|| size == VectorSize::Size64x2
2025
);
2026
(0b0, 0b01100, enc_size)
2027
}
2028
VecMisc2::Fcmle0 => {
2029
debug_assert!(
2030
size == VectorSize::Size32x2
2031
|| size == VectorSize::Size32x4
2032
|| size == VectorSize::Size64x2
2033
);
2034
(0b1, 0b01101, enc_size)
2035
}
2036
VecMisc2::Fcmlt0 => {
2037
debug_assert!(
2038
size == VectorSize::Size32x2
2039
|| size == VectorSize::Size32x4
2040
|| size == VectorSize::Size64x2
2041
);
2042
(0b0, 0b01110, enc_size)
2043
}
2044
};
2045
sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
2046
}
2047
&Inst::VecLanes { op, rd, rn, size } => {
2048
let (q, size) = match size {
2049
VectorSize::Size8x8 => (0b0, 0b00),
2050
VectorSize::Size8x16 => (0b1, 0b00),
2051
VectorSize::Size16x4 => (0b0, 0b01),
2052
VectorSize::Size16x8 => (0b1, 0b01),
2053
VectorSize::Size32x4 => (0b1, 0b10),
2054
_ => unreachable!(),
2055
};
2056
let (u, opcode) = match op {
2057
VecLanesOp::Uminv => (0b1, 0b11010),
2058
VecLanesOp::Addv => (0b0, 0b11011),
2059
};
2060
sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
2061
}
2062
&Inst::VecShiftImm {
2063
op,
2064
rd,
2065
rn,
2066
size,
2067
imm,
2068
} => {
2069
let (is_shr, mut template) = match op {
2070
VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32),
2071
VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32),
2072
VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32),
2073
};
2074
if size.is_128bits() {
2075
template |= 0b1 << 30;
2076
}
2077
let imm = imm as u32;
2078
// Deal with the somewhat strange encoding scheme for, and limits on,
2079
// the shift amount.
2080
let immh_immb = match (size.lane_size(), is_shr) {
2081
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2082
0b_1000_000_u32 | (64 - imm)
2083
}
2084
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2085
0b_0100_000_u32 | (32 - imm)
2086
}
2087
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2088
0b_0010_000_u32 | (16 - imm)
2089
}
2090
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2091
0b_0001_000_u32 | (8 - imm)
2092
}
2093
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2094
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2095
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2096
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2097
_ => panic!(
2098
"aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2099
),
2100
};
2101
let rn_enc = machreg_to_vec(rn);
2102
let rd_enc = machreg_to_vec(rd.to_reg());
2103
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2104
}
2105
&Inst::VecShiftImmMod {
2106
op,
2107
rd,
2108
ri,
2109
rn,
2110
size,
2111
imm,
2112
} => {
2113
debug_assert_eq!(rd.to_reg(), ri);
2114
let (is_shr, mut template) = match op {
2115
VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
2116
};
2117
if size.is_128bits() {
2118
template |= 0b1 << 30;
2119
}
2120
let imm = imm as u32;
2121
// Deal with the somewhat strange encoding scheme for, and limits on,
2122
// the shift amount.
2123
let immh_immb = match (size.lane_size(), is_shr) {
2124
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2125
0b_1000_000_u32 | (64 - imm)
2126
}
2127
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2128
0b_0100_000_u32 | (32 - imm)
2129
}
2130
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2131
0b_0010_000_u32 | (16 - imm)
2132
}
2133
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2134
0b_0001_000_u32 | (8 - imm)
2135
}
2136
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2137
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2138
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2139
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2140
_ => panic!(
2141
"aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2142
),
2143
};
2144
let rn_enc = machreg_to_vec(rn);
2145
let rd_enc = machreg_to_vec(rd.to_reg());
2146
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2147
}
2148
&Inst::VecExtract { rd, rn, rm, imm4 } => {
2149
if imm4 < 16 {
2150
let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
2151
let rm_enc = machreg_to_vec(rm);
2152
let rn_enc = machreg_to_vec(rn);
2153
let rd_enc = machreg_to_vec(rd.to_reg());
2154
sink.put4(
2155
template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
2156
);
2157
} else {
2158
panic!("aarch64: Inst::VecExtract: emit: invalid extract index {imm4}");
2159
}
2160
}
2161
&Inst::VecTbl { rd, rn, rm } => {
2162
sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm));
2163
}
2164
&Inst::VecTblExt { rd, ri, rn, rm } => {
2165
debug_assert_eq!(rd.to_reg(), ri);
2166
sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm));
2167
}
2168
&Inst::VecTbl2 { rd, rn, rn2, rm } => {
2169
assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2170
sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm));
2171
}
2172
&Inst::VecTbl2Ext {
2173
rd,
2174
ri,
2175
rn,
2176
rn2,
2177
rm,
2178
} => {
2179
debug_assert_eq!(rd.to_reg(), ri);
2180
assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2181
sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm));
2182
}
2183
&Inst::FpuCmp { size, rn, rm } => {
2184
sink.put4(enc_fcmp(size, rn, rm));
2185
}
2186
&Inst::FpuToInt { op, rd, rn } => {
2187
let top16 = match op {
2188
// FCVTZS (32/32-bit)
2189
FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
2190
// FCVTZU (32/32-bit)
2191
FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
2192
// FCVTZS (32/64-bit)
2193
FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
2194
// FCVTZU (32/64-bit)
2195
FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
2196
// FCVTZS (64/32-bit)
2197
FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
2198
// FCVTZU (64/32-bit)
2199
FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
2200
// FCVTZS (64/64-bit)
2201
FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
2202
// FCVTZU (64/64-bit)
2203
FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
2204
};
2205
sink.put4(enc_fputoint(top16, rd, rn));
2206
}
2207
&Inst::IntToFpu { op, rd, rn } => {
2208
let top16 = match op {
2209
// SCVTF (32/32-bit)
2210
IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
2211
// UCVTF (32/32-bit)
2212
IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
2213
// SCVTF (64/32-bit)
2214
IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
2215
// UCVTF (64/32-bit)
2216
IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
2217
// SCVTF (32/64-bit)
2218
IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
2219
// UCVTF (32/64-bit)
2220
IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
2221
// SCVTF (64/64-bit)
2222
IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
2223
// UCVTF (64/64-bit)
2224
IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
2225
};
2226
sink.put4(enc_inttofpu(top16, rd, rn));
2227
}
2228
&Inst::FpuCSel16 { rd, rn, rm, cond } => {
2229
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size16));
2230
}
2231
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
2232
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
2233
}
2234
&Inst::FpuCSel64 { rd, rn, rm, cond } => {
2235
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
2236
}
2237
&Inst::FpuRound { op, rd, rn } => {
2238
let top22 = match op {
2239
FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
2240
FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
2241
FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
2242
FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
2243
FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
2244
FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
2245
FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
2246
FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
2247
};
2248
sink.put4(enc_fround(top22, rd, rn));
2249
}
2250
&Inst::MovToFpu { rd, rn, size } => {
2251
let template = match size {
2252
ScalarSize::Size16 => 0b000_11110_11_1_00_111_000000_00000_00000,
2253
ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
2254
ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
2255
_ => unreachable!(),
2256
};
2257
sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
2258
}
2259
&Inst::FpuMoveFPImm { rd, imm, size } => {
2260
sink.put4(
2261
0b000_11110_00_1_00_000_000100_00000_00000
2262
| size.ftype() << 22
2263
| ((imm.enc_bits() as u32) << 13)
2264
| machreg_to_vec(rd.to_reg()),
2265
);
2266
}
2267
&Inst::MovToVec {
2268
rd,
2269
ri,
2270
rn,
2271
idx,
2272
size,
2273
} => {
2274
debug_assert_eq!(rd.to_reg(), ri);
2275
let (imm5, shift) = match size.lane_size() {
2276
ScalarSize::Size8 => (0b00001, 1),
2277
ScalarSize::Size16 => (0b00010, 2),
2278
ScalarSize::Size32 => (0b00100, 3),
2279
ScalarSize::Size64 => (0b01000, 4),
2280
_ => unreachable!(),
2281
};
2282
debug_assert_eq!(idx & (0b11111 >> shift), idx);
2283
let imm5 = imm5 | ((idx as u32) << shift);
2284
sink.put4(
2285
0b010_01110000_00000_0_0011_1_00000_00000
2286
| (imm5 << 16)
2287
| (machreg_to_gpr(rn) << 5)
2288
| machreg_to_vec(rd.to_reg()),
2289
);
2290
}
2291
&Inst::MovFromVec { rd, rn, idx, size } => {
2292
let (q, imm5, shift, mask) = match size {
2293
ScalarSize::Size8 => (0b0, 0b00001, 1, 0b1111),
2294
ScalarSize::Size16 => (0b0, 0b00010, 2, 0b0111),
2295
ScalarSize::Size32 => (0b0, 0b00100, 3, 0b0011),
2296
ScalarSize::Size64 => (0b1, 0b01000, 4, 0b0001),
2297
_ => panic!("Unexpected scalar FP operand size: {size:?}"),
2298
};
2299
debug_assert_eq!(idx & mask, idx);
2300
let imm5 = imm5 | ((idx as u32) << shift);
2301
sink.put4(
2302
0b000_01110000_00000_0_0111_1_00000_00000
2303
| (q << 30)
2304
| (imm5 << 16)
2305
| (machreg_to_vec(rn) << 5)
2306
| machreg_to_gpr(rd.to_reg()),
2307
);
2308
}
2309
&Inst::MovFromVecSigned {
2310
rd,
2311
rn,
2312
idx,
2313
size,
2314
scalar_size,
2315
} => {
2316
let (imm5, shift, half) = match size {
2317
VectorSize::Size8x8 => (0b00001, 1, true),
2318
VectorSize::Size8x16 => (0b00001, 1, false),
2319
VectorSize::Size16x4 => (0b00010, 2, true),
2320
VectorSize::Size16x8 => (0b00010, 2, false),
2321
VectorSize::Size32x2 => {
2322
debug_assert_ne!(scalar_size, OperandSize::Size32);
2323
(0b00100, 3, true)
2324
}
2325
VectorSize::Size32x4 => {
2326
debug_assert_ne!(scalar_size, OperandSize::Size32);
2327
(0b00100, 3, false)
2328
}
2329
_ => panic!("Unexpected vector operand size"),
2330
};
2331
debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
2332
let imm5 = imm5 | ((idx as u32) << shift);
2333
sink.put4(
2334
0b000_01110000_00000_0_0101_1_00000_00000
2335
| (scalar_size.is64() as u32) << 30
2336
| (imm5 << 16)
2337
| (machreg_to_vec(rn) << 5)
2338
| machreg_to_gpr(rd.to_reg()),
2339
);
2340
}
2341
&Inst::VecDup { rd, rn, size } => {
2342
let q = size.is_128bits() as u32;
2343
let imm5 = match size.lane_size() {
2344
ScalarSize::Size8 => 0b00001,
2345
ScalarSize::Size16 => 0b00010,
2346
ScalarSize::Size32 => 0b00100,
2347
ScalarSize::Size64 => 0b01000,
2348
_ => unreachable!(),
2349
};
2350
sink.put4(
2351
0b0_0_0_01110000_00000_000011_00000_00000
2352
| (q << 30)
2353
| (imm5 << 16)
2354
| (machreg_to_gpr(rn) << 5)
2355
| machreg_to_vec(rd.to_reg()),
2356
);
2357
}
2358
&Inst::VecDupFromFpu { rd, rn, size, lane } => {
2359
let q = size.is_128bits() as u32;
2360
let imm5 = match size.lane_size() {
2361
ScalarSize::Size8 => {
2362
assert!(lane < 16);
2363
0b00001 | (u32::from(lane) << 1)
2364
}
2365
ScalarSize::Size16 => {
2366
assert!(lane < 8);
2367
0b00010 | (u32::from(lane) << 2)
2368
}
2369
ScalarSize::Size32 => {
2370
assert!(lane < 4);
2371
0b00100 | (u32::from(lane) << 3)
2372
}
2373
ScalarSize::Size64 => {
2374
assert!(lane < 2);
2375
0b01000 | (u32::from(lane) << 4)
2376
}
2377
_ => unimplemented!(),
2378
};
2379
sink.put4(
2380
0b000_01110000_00000_000001_00000_00000
2381
| (q << 30)
2382
| (imm5 << 16)
2383
| (machreg_to_vec(rn) << 5)
2384
| machreg_to_vec(rd.to_reg()),
2385
);
2386
}
2387
&Inst::VecDupFPImm { rd, imm, size } => {
2388
let imm = imm.enc_bits();
2389
let op = match size.lane_size() {
2390
ScalarSize::Size32 => 0,
2391
ScalarSize::Size64 => 1,
2392
_ => unimplemented!(),
2393
};
2394
let q_op = op | ((size.is_128bits() as u32) << 1);
2395
2396
sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
2397
}
2398
&Inst::VecDupImm {
2399
rd,
2400
imm,
2401
invert,
2402
size,
2403
} => {
2404
let (imm, shift, shift_ones) = imm.value();
2405
let (op, cmode) = match size.lane_size() {
2406
ScalarSize::Size8 => {
2407
assert!(!invert);
2408
assert_eq!(shift, 0);
2409
2410
(0, 0b1110)
2411
}
2412
ScalarSize::Size16 => {
2413
let s = shift & 8;
2414
2415
assert!(!shift_ones);
2416
assert_eq!(s, shift);
2417
2418
(invert as u32, 0b1000 | (s >> 2))
2419
}
2420
ScalarSize::Size32 => {
2421
if shift_ones {
2422
assert!(shift == 8 || shift == 16);
2423
2424
(invert as u32, 0b1100 | (shift >> 4))
2425
} else {
2426
let s = shift & 24;
2427
2428
assert_eq!(s, shift);
2429
2430
(invert as u32, 0b0000 | (s >> 2))
2431
}
2432
}
2433
ScalarSize::Size64 => {
2434
assert!(!invert);
2435
assert_eq!(shift, 0);
2436
2437
(1, 0b1110)
2438
}
2439
_ => unreachable!(),
2440
};
2441
let q_op = op | ((size.is_128bits() as u32) << 1);
2442
2443
sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
2444
}
2445
&Inst::VecExtend {
2446
t,
2447
rd,
2448
rn,
2449
high_half,
2450
lane_size,
2451
} => {
2452
let immh = match lane_size {
2453
ScalarSize::Size16 => 0b001,
2454
ScalarSize::Size32 => 0b010,
2455
ScalarSize::Size64 => 0b100,
2456
_ => panic!("Unexpected VecExtend to lane size of {lane_size:?}"),
2457
};
2458
let u = match t {
2459
VecExtendOp::Sxtl => 0b0,
2460
VecExtendOp::Uxtl => 0b1,
2461
};
2462
sink.put4(
2463
0b000_011110_0000_000_101001_00000_00000
2464
| ((high_half as u32) << 30)
2465
| (u << 29)
2466
| (immh << 19)
2467
| (machreg_to_vec(rn) << 5)
2468
| machreg_to_vec(rd.to_reg()),
2469
);
2470
}
2471
&Inst::VecRRLong {
2472
op,
2473
rd,
2474
rn,
2475
high_half,
2476
} => {
2477
let (u, size, bits_12_16) = match op {
2478
VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),
2479
VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),
2480
VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011),
2481
VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011),
2482
VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011),
2483
};
2484
2485
sink.put4(enc_vec_rr_misc(
2486
((high_half as u32) << 1) | u,
2487
size,
2488
bits_12_16,
2489
rd,
2490
rn,
2491
));
2492
}
2493
&Inst::VecRRNarrowLow {
2494
op,
2495
rd,
2496
rn,
2497
lane_size,
2498
}
2499
| &Inst::VecRRNarrowHigh {
2500
op,
2501
rd,
2502
rn,
2503
lane_size,
2504
..
2505
} => {
2506
let high_half = match self {
2507
&Inst::VecRRNarrowLow { .. } => false,
2508
&Inst::VecRRNarrowHigh { .. } => true,
2509
_ => unreachable!(),
2510
};
2511
2512
let size = match lane_size {
2513
ScalarSize::Size8 => 0b00,
2514
ScalarSize::Size16 => 0b01,
2515
ScalarSize::Size32 => 0b10,
2516
_ => panic!("unsupported size: {lane_size:?}"),
2517
};
2518
2519
// Floats use a single bit, to encode either half or single.
2520
let size = match op {
2521
VecRRNarrowOp::Fcvtn => size >> 1,
2522
_ => size,
2523
};
2524
2525
let (u, bits_12_16) = match op {
2526
VecRRNarrowOp::Xtn => (0b0, 0b10010),
2527
VecRRNarrowOp::Sqxtn => (0b0, 0b10100),
2528
VecRRNarrowOp::Sqxtun => (0b1, 0b10010),
2529
VecRRNarrowOp::Uqxtn => (0b1, 0b10100),
2530
VecRRNarrowOp::Fcvtn => (0b0, 0b10110),
2531
};
2532
2533
sink.put4(enc_vec_rr_misc(
2534
((high_half as u32) << 1) | u,
2535
size,
2536
bits_12_16,
2537
rd,
2538
rn,
2539
));
2540
}
2541
&Inst::VecMovElement {
2542
rd,
2543
ri,
2544
rn,
2545
dest_idx,
2546
src_idx,
2547
size,
2548
} => {
2549
debug_assert_eq!(rd.to_reg(), ri);
2550
let (imm5, shift) = match size.lane_size() {
2551
ScalarSize::Size8 => (0b00001, 1),
2552
ScalarSize::Size16 => (0b00010, 2),
2553
ScalarSize::Size32 => (0b00100, 3),
2554
ScalarSize::Size64 => (0b01000, 4),
2555
_ => unreachable!(),
2556
};
2557
let mask = 0b11111 >> shift;
2558
debug_assert_eq!(dest_idx & mask, dest_idx);
2559
debug_assert_eq!(src_idx & mask, src_idx);
2560
let imm4 = (src_idx as u32) << (shift - 1);
2561
let imm5 = imm5 | ((dest_idx as u32) << shift);
2562
sink.put4(
2563
0b011_01110000_00000_0_0000_1_00000_00000
2564
| (imm5 << 16)
2565
| (imm4 << 11)
2566
| (machreg_to_vec(rn) << 5)
2567
| machreg_to_vec(rd.to_reg()),
2568
);
2569
}
2570
&Inst::VecRRPair { op, rd, rn } => {
2571
let bits_12_16 = match op {
2572
VecPairOp::Addp => 0b11011,
2573
};
2574
2575
sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
2576
}
2577
&Inst::VecRRRLong {
2578
rd,
2579
rn,
2580
rm,
2581
alu_op,
2582
high_half,
2583
} => {
2584
let (u, size, bit14) = match alu_op {
2585
VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),
2586
VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),
2587
VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1),
2588
VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
2589
VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
2590
VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
2591
};
2592
sink.put4(enc_vec_rrr_long(
2593
high_half as u32,
2594
u,
2595
size,
2596
bit14,
2597
rm,
2598
rn,
2599
rd,
2600
));
2601
}
2602
&Inst::VecRRRLongMod {
2603
rd,
2604
ri,
2605
rn,
2606
rm,
2607
alu_op,
2608
high_half,
2609
} => {
2610
debug_assert_eq!(rd.to_reg(), ri);
2611
let (u, size, bit14) = match alu_op {
2612
VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0),
2613
VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0),
2614
VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0),
2615
};
2616
sink.put4(enc_vec_rrr_long(
2617
high_half as u32,
2618
u,
2619
size,
2620
bit14,
2621
rm,
2622
rn,
2623
rd,
2624
));
2625
}
2626
&Inst::VecRRPairLong { op, rd, rn } => {
2627
let (u, size) = match op {
2628
VecRRPairLongOp::Saddlp8 => (0b0, 0b0),
2629
VecRRPairLongOp::Uaddlp8 => (0b1, 0b0),
2630
VecRRPairLongOp::Saddlp16 => (0b0, 0b1),
2631
VecRRPairLongOp::Uaddlp16 => (0b1, 0b1),
2632
};
2633
2634
sink.put4(enc_vec_rr_pair_long(u, size, rd, rn));
2635
}
2636
&Inst::VecRRR {
2637
rd,
2638
rn,
2639
rm,
2640
alu_op,
2641
size,
2642
} => {
2643
let (q, enc_size) = size.enc_size();
2644
let is_float = match alu_op {
2645
VecALUOp::Fcmeq
2646
| VecALUOp::Fcmgt
2647
| VecALUOp::Fcmge
2648
| VecALUOp::Fadd
2649
| VecALUOp::Fsub
2650
| VecALUOp::Fdiv
2651
| VecALUOp::Fmax
2652
| VecALUOp::Fmin
2653
| VecALUOp::Fmul => true,
2654
_ => false,
2655
};
2656
2657
let (top11, bit15_10) = match alu_op {
2658
VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
2659
VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
2660
VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
2661
VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
2662
VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
2663
VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
2664
VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
2665
VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
2666
VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
2667
VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
2668
VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
2669
VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
2670
// The following logical instructions operate on bytes, so are not encoded differently
2671
// for the different vector types.
2672
VecALUOp::And => (0b000_01110_00_1, 0b000111),
2673
VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
2674
VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
2675
VecALUOp::Orn => (0b000_01110_11_1, 0b000111),
2676
VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
2677
VecALUOp::Umaxp => {
2678
debug_assert_ne!(size, VectorSize::Size64x2);
2679
2680
(0b001_01110_00_1 | enc_size << 1, 0b101001)
2681
}
2682
VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
2683
VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
2684
VecALUOp::Mul => {
2685
debug_assert_ne!(size, VectorSize::Size64x2);
2686
(0b000_01110_00_1 | enc_size << 1, 0b100111)
2687
}
2688
VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
2689
VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
2690
VecALUOp::Umin => {
2691
debug_assert_ne!(size, VectorSize::Size64x2);
2692
2693
(0b001_01110_00_1 | enc_size << 1, 0b011011)
2694
}
2695
VecALUOp::Smin => {
2696
debug_assert_ne!(size, VectorSize::Size64x2);
2697
2698
(0b000_01110_00_1 | enc_size << 1, 0b011011)
2699
}
2700
VecALUOp::Umax => {
2701
debug_assert_ne!(size, VectorSize::Size64x2);
2702
2703
(0b001_01110_00_1 | enc_size << 1, 0b011001)
2704
}
2705
VecALUOp::Smax => {
2706
debug_assert_ne!(size, VectorSize::Size64x2);
2707
2708
(0b000_01110_00_1 | enc_size << 1, 0b011001)
2709
}
2710
VecALUOp::Urhadd => {
2711
debug_assert_ne!(size, VectorSize::Size64x2);
2712
2713
(0b001_01110_00_1 | enc_size << 1, 0b000101)
2714
}
2715
VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
2716
VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
2717
VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
2718
VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
2719
VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
2720
VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
2721
VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
2722
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
2723
VecALUOp::Zip2 => (0b01001110_00_0 | enc_size << 1, 0b011110),
2724
VecALUOp::Sqrdmulh => {
2725
debug_assert!(
2726
size.lane_size() == ScalarSize::Size16
2727
|| size.lane_size() == ScalarSize::Size32
2728
);
2729
2730
(0b001_01110_00_1 | enc_size << 1, 0b101101)
2731
}
2732
VecALUOp::Uzp1 => (0b01001110_00_0 | enc_size << 1, 0b000110),
2733
VecALUOp::Uzp2 => (0b01001110_00_0 | enc_size << 1, 0b010110),
2734
VecALUOp::Trn1 => (0b01001110_00_0 | enc_size << 1, 0b001010),
2735
VecALUOp::Trn2 => (0b01001110_00_0 | enc_size << 1, 0b011010),
2736
};
2737
let top11 = if is_float {
2738
top11 | size.enc_float_size() << 1
2739
} else {
2740
top11
2741
};
2742
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2743
}
2744
&Inst::VecRRRMod {
2745
rd,
2746
ri,
2747
rn,
2748
rm,
2749
alu_op,
2750
size,
2751
} => {
2752
debug_assert_eq!(rd.to_reg(), ri);
2753
let (q, _enc_size) = size.enc_size();
2754
2755
let (top11, bit15_10) = match alu_op {
2756
VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111),
2757
VecALUModOp::Fmla => {
2758
(0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011)
2759
}
2760
VecALUModOp::Fmls => {
2761
(0b000_01110_10_1 | (size.enc_float_size() << 1), 0b110011)
2762
}
2763
};
2764
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2765
}
2766
&Inst::VecFmlaElem {
2767
rd,
2768
ri,
2769
rn,
2770
rm,
2771
alu_op,
2772
size,
2773
idx,
2774
} => {
2775
debug_assert_eq!(rd.to_reg(), ri);
2776
let idx = u32::from(idx);
2777
2778
let (q, _size) = size.enc_size();
2779
let o2 = match alu_op {
2780
VecALUModOp::Fmla => 0b0,
2781
VecALUModOp::Fmls => 0b1,
2782
_ => unreachable!(),
2783
};
2784
2785
let (h, l) = match size {
2786
VectorSize::Size32x4 => {
2787
assert!(idx < 4);
2788
(idx >> 1, idx & 1)
2789
}
2790
VectorSize::Size64x2 => {
2791
assert!(idx < 2);
2792
(idx, 0)
2793
}
2794
_ => unreachable!(),
2795
};
2796
2797
let top11 = 0b000_011111_00 | (q << 9) | (size.enc_float_size() << 1) | l;
2798
let bit15_10 = 0b000100 | (o2 << 4) | (h << 1);
2799
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
2800
}
2801
&Inst::VecLoadReplicate {
2802
rd,
2803
rn,
2804
size,
2805
flags,
2806
} => {
2807
let (q, size) = size.enc_size();
2808
2809
if let Some(trap_code) = flags.trap_code() {
2810
// Register the offset at which the actual load instruction starts.
2811
sink.add_trap(trap_code);
2812
}
2813
2814
sink.put4(enc_ldst_vec(q, size, rn, rd));
2815
}
2816
&Inst::VecCSel { rd, rn, rm, cond } => {
2817
/* Emit this:
2818
b.cond else
2819
mov rd, rm
2820
b out
2821
else:
2822
mov rd, rn
2823
out:
2824
2825
Note, we could do better in the cases where rd == rn or rd == rm.
2826
*/
2827
let else_label = sink.get_label();
2828
let out_label = sink.get_label();
2829
2830
// b.cond else
2831
let br_else_offset = sink.cur_offset();
2832
sink.put4(enc_conditional_br(
2833
BranchTarget::Label(else_label),
2834
CondBrKind::Cond(cond),
2835
));
2836
sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
2837
2838
// mov rd, rm
2839
sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
2840
2841
// b out
2842
let b_out_offset = sink.cur_offset();
2843
sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
2844
sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
2845
sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
2846
2847
// else:
2848
sink.bind_label(else_label, &mut state.ctrl_plane);
2849
2850
// mov rd, rn
2851
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
2852
2853
// out:
2854
sink.bind_label(out_label, &mut state.ctrl_plane);
2855
}
2856
&Inst::MovToNZCV { rn } => {
2857
sink.put4(0xd51b4200 | machreg_to_gpr(rn));
2858
}
2859
&Inst::MovFromNZCV { rd } => {
2860
sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
2861
}
2862
&Inst::Extend {
2863
rd,
2864
rn,
2865
signed: false,
2866
from_bits: 1,
2867
to_bits,
2868
} => {
2869
assert!(to_bits <= 64);
2870
// Reduce zero-extend-from-1-bit to:
2871
// - and rd, rn, #1
2872
// Note: This is special cased as UBFX may take more cycles
2873
// than AND on smaller cores.
2874
let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
2875
Inst::AluRRImmLogic {
2876
alu_op: ALUOp::And,
2877
size: OperandSize::Size32,
2878
rd,
2879
rn,
2880
imml,
2881
}
2882
.emit(sink, emit_info, state);
2883
}
2884
&Inst::Extend {
2885
rd,
2886
rn,
2887
signed: false,
2888
from_bits: 32,
2889
to_bits: 64,
2890
} => {
2891
let mov = Inst::Mov {
2892
size: OperandSize::Size32,
2893
rd,
2894
rm: rn,
2895
};
2896
mov.emit(sink, emit_info, state);
2897
}
2898
&Inst::Extend {
2899
rd,
2900
rn,
2901
signed,
2902
from_bits,
2903
to_bits,
2904
} => {
2905
let (opc, size) = if signed {
2906
(0b00, OperandSize::from_bits(to_bits))
2907
} else {
2908
(0b10, OperandSize::Size32)
2909
};
2910
sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
2911
}
2912
&Inst::Jump { ref dest } => {
2913
let off = sink.cur_offset();
2914
// Indicate that the jump uses a label, if so, so that a fixup can occur later.
2915
if let Some(l) = dest.as_label() {
2916
sink.use_label_at_offset(off, l, LabelUse::Branch26);
2917
sink.add_uncond_branch(off, off + 4, l);
2918
}
2919
// Emit the jump itself.
2920
sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
2921
}
2922
&Inst::Args { .. } | &Inst::Rets { .. } => {
2923
// Nothing: this is a pseudoinstruction that serves
2924
// only to constrain registers at a certain point.
2925
}
2926
&Inst::Ret {} => {
2927
sink.put4(0xd65f03c0);
2928
}
2929
&Inst::AuthenticatedRet { key, is_hint } => {
2930
let (op2, is_hint) = match key {
2931
APIKey::AZ => (0b100, true),
2932
APIKey::ASP => (0b101, is_hint),
2933
APIKey::BZ => (0b110, true),
2934
APIKey::BSP => (0b111, is_hint),
2935
};
2936
2937
if is_hint {
2938
sink.put4(key.enc_auti_hint());
2939
Inst::Ret {}.emit(sink, emit_info, state);
2940
} else {
2941
sink.put4(0xd65f0bff | (op2 << 9)); // reta{key}
2942
}
2943
}
2944
&Inst::Call { ref info } => {
2945
let start = sink.cur_offset();
2946
let user_stack_map = state.take_stack_map();
2947
sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
2948
sink.put4(enc_jump26(0b100101, 0));
2949
if let Some(s) = user_stack_map {
2950
let offset = sink.cur_offset();
2951
sink.push_user_stack_map(state, offset, s);
2952
}
2953
2954
if let Some(try_call) = info.try_call_info.as_ref() {
2955
sink.add_try_call_site(
2956
Some(state.frame_layout.sp_to_fp()),
2957
try_call.exception_handlers(&state.frame_layout),
2958
);
2959
} else {
2960
sink.add_call_site();
2961
}
2962
2963
if info.callee_pop_size > 0 {
2964
let callee_pop_size =
2965
i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
2966
for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
2967
inst.emit(sink, emit_info, state);
2968
}
2969
}
2970
2971
if info.patchable {
2972
sink.add_patchable_call_site(sink.cur_offset() - start);
2973
} else {
2974
// Load any stack-carried return values.
2975
info.emit_retval_loads::<AArch64MachineDeps, _, _>(
2976
state.frame_layout().stackslots_size,
2977
|inst| inst.emit(sink, emit_info, state),
2978
|needed_space| Some(Inst::EmitIsland { needed_space }),
2979
);
2980
}
2981
2982
// If this is a try-call, jump to the continuation
2983
// (normal-return) block.
2984
if let Some(try_call) = info.try_call_info.as_ref() {
2985
let jmp = Inst::Jump {
2986
dest: BranchTarget::Label(try_call.continuation),
2987
};
2988
jmp.emit(sink, emit_info, state);
2989
}
2990
2991
// We produce an island above if needed, so disable
2992
// the worst-case-size check in this case.
2993
start_off = sink.cur_offset();
2994
}
2995
&Inst::CallInd { ref info } => {
2996
let user_stack_map = state.take_stack_map();
2997
sink.put4(
2998
0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.dest) << 5),
2999
);
3000
if let Some(s) = user_stack_map {
3001
let offset = sink.cur_offset();
3002
sink.push_user_stack_map(state, offset, s);
3003
}
3004
3005
if let Some(try_call) = info.try_call_info.as_ref() {
3006
sink.add_try_call_site(
3007
Some(state.frame_layout.sp_to_fp()),
3008
try_call.exception_handlers(&state.frame_layout),
3009
);
3010
} else {
3011
sink.add_call_site();
3012
}
3013
3014
if info.callee_pop_size > 0 {
3015
let callee_pop_size =
3016
i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
3017
for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
3018
inst.emit(sink, emit_info, state);
3019
}
3020
}
3021
3022
// Load any stack-carried return values.
3023
info.emit_retval_loads::<AArch64MachineDeps, _, _>(
3024
state.frame_layout().stackslots_size,
3025
|inst| inst.emit(sink, emit_info, state),
3026
|needed_space| Some(Inst::EmitIsland { needed_space }),
3027
);
3028
3029
// If this is a try-call, jump to the continuation
3030
// (normal-return) block.
3031
if let Some(try_call) = info.try_call_info.as_ref() {
3032
let jmp = Inst::Jump {
3033
dest: BranchTarget::Label(try_call.continuation),
3034
};
3035
jmp.emit(sink, emit_info, state);
3036
}
3037
3038
// We produce an island above if needed, so disable
3039
// the worst-case-size check in this case.
3040
start_off = sink.cur_offset();
3041
}
3042
&Inst::ReturnCall { ref info } => {
3043
emit_return_call_common_sequence(sink, emit_info, state, info);
3044
3045
// Note: this is not `Inst::Jump { .. }.emit(..)` because we
3046
// have different metadata in this case: we don't have a label
3047
// for the target, but rather a function relocation.
3048
sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
3049
sink.put4(enc_jump26(0b000101, 0));
3050
sink.add_call_site();
3051
3052
// `emit_return_call_common_sequence` emits an island if
3053
// necessary, so we can safely disable the worst-case-size check
3054
// in this case.
3055
start_off = sink.cur_offset();
3056
}
3057
&Inst::ReturnCallInd { ref info } => {
3058
emit_return_call_common_sequence(sink, emit_info, state, info);
3059
3060
Inst::IndirectBr {
3061
rn: info.dest,
3062
targets: vec![],
3063
}
3064
.emit(sink, emit_info, state);
3065
sink.add_call_site();
3066
3067
// `emit_return_call_common_sequence` emits an island if
3068
// necessary, so we can safely disable the worst-case-size check
3069
// in this case.
3070
start_off = sink.cur_offset();
3071
}
3072
&Inst::CondBr {
3073
taken,
3074
not_taken,
3075
kind,
3076
} => {
3077
// Conditional part first.
3078
let cond_off = sink.cur_offset();
3079
if let Some(l) = taken.as_label() {
3080
sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
3081
let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
3082
sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3083
}
3084
sink.put4(enc_conditional_br(taken, kind));
3085
3086
// Unconditional part next.
3087
let uncond_off = sink.cur_offset();
3088
if let Some(l) = not_taken.as_label() {
3089
sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3090
sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3091
}
3092
sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3093
}
3094
&Inst::TestBitAndBranch {
3095
taken,
3096
not_taken,
3097
kind,
3098
rn,
3099
bit,
3100
} => {
3101
// Emit the conditional branch first
3102
let cond_off = sink.cur_offset();
3103
if let Some(l) = taken.as_label() {
3104
sink.use_label_at_offset(cond_off, l, LabelUse::Branch14);
3105
let inverted =
3106
enc_test_bit_and_branch(kind.complement(), taken, rn, bit).to_le_bytes();
3107
sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3108
}
3109
sink.put4(enc_test_bit_and_branch(kind, taken, rn, bit));
3110
3111
// Unconditional part next.
3112
let uncond_off = sink.cur_offset();
3113
if let Some(l) = not_taken.as_label() {
3114
sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3115
sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3116
}
3117
sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3118
}
3119
&Inst::TrapIf { kind, trap_code } => {
3120
let label = sink.defer_trap(trap_code);
3121
// condbr KIND, LABEL
3122
let off = sink.cur_offset();
3123
sink.put4(enc_conditional_br(BranchTarget::Label(label), kind));
3124
sink.use_label_at_offset(off, label, LabelUse::Branch19);
3125
}
3126
&Inst::IndirectBr { rn, .. } => {
3127
sink.put4(enc_br(rn));
3128
}
3129
&Inst::Nop0 => {}
3130
&Inst::Nop4 => {
3131
sink.put4(0xd503201f);
3132
}
3133
&Inst::Brk => {
3134
sink.put4(0xd43e0000);
3135
}
3136
&Inst::Udf { trap_code } => {
3137
sink.add_trap(trap_code);
3138
sink.put_data(Inst::TRAP_OPCODE);
3139
}
3140
&Inst::Adr { rd, off } => {
3141
assert!(off > -(1 << 20));
3142
assert!(off < (1 << 20));
3143
sink.put4(enc_adr(off, rd));
3144
}
3145
&Inst::Adrp { rd, off } => {
3146
assert!(off > -(1 << 20));
3147
assert!(off < (1 << 20));
3148
sink.put4(enc_adrp(off, rd));
3149
}
3150
&Inst::Word4 { data } => {
3151
sink.put4(data);
3152
}
3153
&Inst::Word8 { data } => {
3154
sink.put8(data);
3155
}
3156
&Inst::JTSequence {
3157
ridx,
3158
rtmp1,
3159
rtmp2,
3160
default,
3161
ref targets,
3162
..
3163
} => {
3164
// This sequence is *one* instruction in the vcode, and is expanded only here at
3165
// emission time, because we cannot allow the regalloc to insert spills/reloads in
3166
// the middle; we depend on hardcoded PC-rel addressing below.
3167
3168
// Branch to default when condition code from prior comparison indicates.
3169
let br =
3170
enc_conditional_br(BranchTarget::Label(default), CondBrKind::Cond(Cond::Hs));
3171
3172
// No need to inform the sink's branch folding logic about this branch, because it
3173
// will not be merged with any other branch, flipped, or elided (it is not preceded
3174
// or succeeded by any other branch). Just emit it with the label use.
3175
let default_br_offset = sink.cur_offset();
3176
sink.use_label_at_offset(default_br_offset, default, LabelUse::Branch19);
3177
sink.put4(br);
3178
3179
// Overwrite the index with a zero when the above
3180
// branch misspeculates (Spectre mitigation). Save the
3181
// resulting index in rtmp2.
3182
let inst = Inst::CSel {
3183
rd: rtmp2,
3184
cond: Cond::Hs,
3185
rn: zero_reg(),
3186
rm: ridx,
3187
};
3188
inst.emit(sink, emit_info, state);
3189
// Prevent any data value speculation.
3190
Inst::Csdb.emit(sink, emit_info, state);
3191
3192
// Load address of jump table
3193
let inst = Inst::Adr { rd: rtmp1, off: 16 };
3194
inst.emit(sink, emit_info, state);
3195
// Load value out of jump table
3196
let inst = Inst::SLoad32 {
3197
rd: rtmp2,
3198
mem: AMode::reg_plus_reg_scaled_extended(
3199
rtmp1.to_reg(),
3200
rtmp2.to_reg(),
3201
ExtendOp::UXTW,
3202
),
3203
flags: MemFlags::trusted(),
3204
};
3205
inst.emit(sink, emit_info, state);
3206
// Add base of jump table to jump-table-sourced block offset
3207
let inst = Inst::AluRRR {
3208
alu_op: ALUOp::Add,
3209
size: OperandSize::Size64,
3210
rd: rtmp1,
3211
rn: rtmp1.to_reg(),
3212
rm: rtmp2.to_reg(),
3213
};
3214
inst.emit(sink, emit_info, state);
3215
// Branch to computed address. (`targets` here is only used for successor queries
3216
// and is not needed for emission.)
3217
let inst = Inst::IndirectBr {
3218
rn: rtmp1.to_reg(),
3219
targets: vec![],
3220
};
3221
inst.emit(sink, emit_info, state);
3222
// Emit jump table (table of 32-bit offsets).
3223
let jt_off = sink.cur_offset();
3224
for &target in targets.iter() {
3225
let word_off = sink.cur_offset();
3226
// off_into_table is an addend here embedded in the label to be later patched
3227
// at the end of codegen. The offset is initially relative to this jump table
3228
// entry; with the extra addend, it'll be relative to the jump table's start,
3229
// after patching.
3230
let off_into_table = word_off - jt_off;
3231
sink.use_label_at_offset(word_off, target, LabelUse::PCRel32);
3232
sink.put4(off_into_table);
3233
}
3234
3235
// Lowering produces an EmitIsland before using a JTSequence, so we can safely
3236
// disable the worst-case-size check in this case.
3237
start_off = sink.cur_offset();
3238
}
3239
&Inst::LoadExtNameGot { rd, ref name } => {
3240
// See this CE Example for the variations of this with and without BTI & PAUTH
3241
// https://godbolt.org/z/ncqjbbvvn
3242
//
3243
// Emit the following code:
3244
// adrp rd, :got:X
3245
// ldr rd, [rd, :got_lo12:X]
3246
3247
// adrp rd, symbol
3248
sink.add_reloc(Reloc::Aarch64AdrGotPage21, &**name, 0);
3249
let inst = Inst::Adrp { rd, off: 0 };
3250
inst.emit(sink, emit_info, state);
3251
3252
// ldr rd, [rd, :got_lo12:X]
3253
sink.add_reloc(Reloc::Aarch64Ld64GotLo12Nc, &**name, 0);
3254
let inst = Inst::ULoad64 {
3255
rd,
3256
mem: AMode::reg(rd.to_reg()),
3257
flags: MemFlags::trusted(),
3258
};
3259
inst.emit(sink, emit_info, state);
3260
}
3261
&Inst::LoadExtNameNear {
3262
rd,
3263
ref name,
3264
offset,
3265
} => {
3266
// Emit the following code:
3267
// adrp rd, X
3268
// add rd, rd, :lo12:X
3269
//
3270
// See https://godbolt.org/z/855KEvM5r for an example.
3271
3272
// adrp rd, symbol
3273
sink.add_reloc(Reloc::Aarch64AdrPrelPgHi21, &**name, offset);
3274
let inst = Inst::Adrp { rd, off: 0 };
3275
inst.emit(sink, emit_info, state);
3276
3277
// add rd, rd, :lo12:X
3278
sink.add_reloc(Reloc::Aarch64AddAbsLo12Nc, &**name, offset);
3279
let inst = Inst::AluRRImm12 {
3280
alu_op: ALUOp::Add,
3281
size: OperandSize::Size64,
3282
rd,
3283
rn: rd.to_reg(),
3284
imm12: Imm12::ZERO,
3285
};
3286
inst.emit(sink, emit_info, state);
3287
}
3288
&Inst::LoadExtNameFar {
3289
rd,
3290
ref name,
3291
offset,
3292
} => {
3293
// With absolute offsets we set up a load from a preallocated space, and then jump
3294
// over it.
3295
//
3296
// Emit the following code:
3297
// ldr rd, #8
3298
// b #0x10
3299
// <8 byte space>
3300
3301
let inst = Inst::ULoad64 {
3302
rd,
3303
mem: AMode::Label {
3304
label: MemLabel::PCRel(8),
3305
},
3306
flags: MemFlags::trusted(),
3307
};
3308
inst.emit(sink, emit_info, state);
3309
let inst = Inst::Jump {
3310
dest: BranchTarget::ResolvedOffset(12),
3311
};
3312
inst.emit(sink, emit_info, state);
3313
sink.add_reloc(Reloc::Abs8, &**name, offset);
3314
sink.put8(0);
3315
}
3316
&Inst::LoadAddr { rd, ref mem } => {
3317
let mem = mem.clone();
3318
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, I8, state);
3319
for inst in mem_insts.into_iter() {
3320
inst.emit(sink, emit_info, state);
3321
}
3322
3323
let (reg, index_reg, offset) = match mem {
3324
AMode::RegExtended { rn, rm, extendop } => {
3325
let r = rn;
3326
(r, Some((rm, extendop)), 0)
3327
}
3328
AMode::Unscaled { rn, simm9 } => {
3329
let r = rn;
3330
(r, None, simm9.value())
3331
}
3332
AMode::UnsignedOffset { rn, uimm12 } => {
3333
let r = rn;
3334
(r, None, uimm12.value() as i32)
3335
}
3336
_ => panic!("Unsupported case for LoadAddr: {mem:?}"),
3337
};
3338
let abs_offset = if offset < 0 {
3339
-offset as u64
3340
} else {
3341
offset as u64
3342
};
3343
let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
3344
3345
if let Some((idx, extendop)) = index_reg {
3346
let add = Inst::AluRRRExtend {
3347
alu_op: ALUOp::Add,
3348
size: OperandSize::Size64,
3349
rd,
3350
rn: reg,
3351
rm: idx,
3352
extendop,
3353
};
3354
3355
add.emit(sink, emit_info, state);
3356
} else if offset == 0 {
3357
if reg != rd.to_reg() {
3358
let mov = Inst::Mov {
3359
size: OperandSize::Size64,
3360
rd,
3361
rm: reg,
3362
};
3363
3364
mov.emit(sink, emit_info, state);
3365
}
3366
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
3367
let add = Inst::AluRRImm12 {
3368
alu_op,
3369
size: OperandSize::Size64,
3370
rd,
3371
rn: reg,
3372
imm12,
3373
};
3374
add.emit(sink, emit_info, state);
3375
} else {
3376
// Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
3377
// was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
3378
// that no other instructions will be inserted here (we're emitting directly),
3379
// and a live range of `tmp2` should not span this instruction, so this use
3380
// should otherwise be correct.
3381
debug_assert!(rd.to_reg() != tmp2_reg());
3382
debug_assert!(reg != tmp2_reg());
3383
let tmp = writable_tmp2_reg();
3384
for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
3385
insn.emit(sink, emit_info, state);
3386
}
3387
let add = Inst::AluRRR {
3388
alu_op,
3389
size: OperandSize::Size64,
3390
rd,
3391
rn: reg,
3392
rm: tmp.to_reg(),
3393
};
3394
add.emit(sink, emit_info, state);
3395
}
3396
}
3397
&Inst::Paci { key } => {
3398
let (crm, op2) = match key {
3399
APIKey::AZ => (0b0011, 0b000),
3400
APIKey::ASP => (0b0011, 0b001),
3401
APIKey::BZ => (0b0011, 0b010),
3402
APIKey::BSP => (0b0011, 0b011),
3403
};
3404
3405
sink.put4(0xd503211f | (crm << 8) | (op2 << 5));
3406
}
3407
&Inst::Xpaclri => sink.put4(0xd50320ff),
3408
&Inst::Bti { targets } => {
3409
let targets = match targets {
3410
BranchTargetType::None => 0b00,
3411
BranchTargetType::C => 0b01,
3412
BranchTargetType::J => 0b10,
3413
BranchTargetType::JC => 0b11,
3414
};
3415
3416
sink.put4(0xd503241f | targets << 6);
3417
}
3418
&Inst::EmitIsland { needed_space } => {
3419
if sink.island_needed(needed_space + 4) {
3420
let jump_around_label = sink.get_label();
3421
let jmp = Inst::Jump {
3422
dest: BranchTarget::Label(jump_around_label),
3423
};
3424
jmp.emit(sink, emit_info, state);
3425
sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
3426
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
3427
}
3428
}
3429
3430
&Inst::ElfTlsGetAddr {
3431
ref symbol,
3432
rd,
3433
tmp,
3434
} => {
3435
assert_eq!(xreg(0), rd.to_reg());
3436
3437
// See the original proposal for TLSDESC.
3438
// http://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf
3439
//
3440
// Implement the TLSDESC instruction sequence:
3441
// adrp x0, :tlsdesc:tlsvar
3442
// ldr tmp, [x0, :tlsdesc_lo12:tlsvar]
3443
// add x0, x0, :tlsdesc_lo12:tlsvar
3444
// blr tmp
3445
// mrs tmp, tpidr_el0
3446
// add x0, x0, tmp
3447
//
3448
// This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch64
3449
// See: https://gcc.godbolt.org/z/e4j7MdErh
3450
3451
// adrp x0, :tlsdesc:tlsvar
3452
sink.add_reloc(Reloc::Aarch64TlsDescAdrPage21, &**symbol, 0);
3453
Inst::Adrp { rd, off: 0 }.emit(sink, emit_info, state);
3454
3455
// ldr tmp, [x0, :tlsdesc_lo12:tlsvar]
3456
sink.add_reloc(Reloc::Aarch64TlsDescLd64Lo12, &**symbol, 0);
3457
Inst::ULoad64 {
3458
rd: tmp,
3459
mem: AMode::reg(rd.to_reg()),
3460
flags: MemFlags::trusted(),
3461
}
3462
.emit(sink, emit_info, state);
3463
3464
// add x0, x0, :tlsdesc_lo12:tlsvar
3465
sink.add_reloc(Reloc::Aarch64TlsDescAddLo12, &**symbol, 0);
3466
Inst::AluRRImm12 {
3467
alu_op: ALUOp::Add,
3468
size: OperandSize::Size64,
3469
rd,
3470
rn: rd.to_reg(),
3471
imm12: Imm12::maybe_from_u64(0).unwrap(),
3472
}
3473
.emit(sink, emit_info, state);
3474
3475
// blr tmp
3476
sink.add_reloc(Reloc::Aarch64TlsDescCall, &**symbol, 0);
3477
Inst::CallInd {
3478
info: crate::isa::Box::new(CallInfo::empty(tmp.to_reg(), CallConv::SystemV)),
3479
}
3480
.emit(sink, emit_info, state);
3481
3482
// mrs tmp, tpidr_el0
3483
sink.put4(0xd53bd040 | machreg_to_gpr(tmp.to_reg()));
3484
3485
// add x0, x0, tmp
3486
Inst::AluRRR {
3487
alu_op: ALUOp::Add,
3488
size: OperandSize::Size64,
3489
rd,
3490
rn: rd.to_reg(),
3491
rm: tmp.to_reg(),
3492
}
3493
.emit(sink, emit_info, state);
3494
}
3495
3496
&Inst::MachOTlsGetAddr { ref symbol, rd } => {
3497
// Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer
3498
// to a function that takes the descriptor address in x0, and after the function returns x0
3499
// contains the address for the thread local variable
3500
//
3501
// what we want to emit is basically:
3502
//
3503
// adrp x0, <label>@TLVPPAGE ; Load the address of the page of the thread local variable pointer (TLVP)
3504
// ldr x0, [x0, <label>@TLVPPAGEOFF] ; Load the descriptor's address into x0
3505
// ldr x1, [x0] ; Load the function pointer (the first part of the descriptor)
3506
// blr x1 ; Call the function pointer with the descriptor address in x0
3507
// ; x0 now contains the TLV address
3508
3509
assert_eq!(xreg(0), rd.to_reg());
3510
let rtmp = writable_xreg(1);
3511
3512
// adrp x0, <label>@TLVPPAGE
3513
sink.add_reloc(Reloc::MachOAarch64TlsAdrPage21, symbol, 0);
3514
sink.put4(0x90000000);
3515
3516
// ldr x0, [x0, <label>@TLVPPAGEOFF]
3517
sink.add_reloc(Reloc::MachOAarch64TlsAdrPageOff12, symbol, 0);
3518
sink.put4(0xf9400000);
3519
3520
// load [x0] into temp register
3521
Inst::ULoad64 {
3522
rd: rtmp,
3523
mem: AMode::reg(rd.to_reg()),
3524
flags: MemFlags::trusted(),
3525
}
3526
.emit(sink, emit_info, state);
3527
3528
// call function pointer in temp register
3529
Inst::CallInd {
3530
info: crate::isa::Box::new(CallInfo::empty(
3531
rtmp.to_reg(),
3532
CallConv::AppleAarch64,
3533
)),
3534
}
3535
.emit(sink, emit_info, state);
3536
}
3537
3538
&Inst::Unwind { ref inst } => {
3539
sink.add_unwind(inst.clone());
3540
}
3541
3542
&Inst::DummyUse { .. } => {}
3543
3544
&Inst::LabelAddress { dst, label } => {
3545
// We emit an ADR only, which is +/- 2MiB range. This
3546
// should be sufficient for the typical use-case of
3547
// this instruction, which is insmall trampolines to
3548
// get exception-handler addresses.
3549
let inst = Inst::Adr { rd: dst, off: 0 };
3550
let offset = sink.cur_offset();
3551
inst.emit(sink, emit_info, state);
3552
sink.use_label_at_offset(offset, label, LabelUse::Adr21);
3553
}
3554
3555
&Inst::SequencePoint { .. } => {
3556
// Nothing.
3557
}
3558
3559
&Inst::StackProbeLoop { start, end, step } => {
3560
assert!(emit_info.0.enable_probestack());
3561
3562
// The loop generated here uses `start` as a counter register to
3563
// count backwards until negating it exceeds `end`. In other
3564
// words `start` is an offset from `sp` we're testing where
3565
// `end` is the max size we need to test. The loop looks like:
3566
//
3567
// loop_start:
3568
// sub start, start, #step
3569
// stur xzr, [sp, start]
3570
// cmn start, end
3571
// br.gt loop_start
3572
// loop_end:
3573
//
3574
// Note that this loop cannot use the spilltmp and tmp2
3575
// registers as those are currently used as the input to this
3576
// loop when generating the instruction. This means that some
3577
// more flavorful address modes and lowerings need to be
3578
// avoided.
3579
//
3580
// Perhaps someone more clever than I can figure out how to use
3581
// `subs` or the like and skip the `cmn`, but I can't figure it
3582
// out at this time.
3583
3584
let loop_start = sink.get_label();
3585
sink.bind_label(loop_start, &mut state.ctrl_plane);
3586
3587
Inst::AluRRImm12 {
3588
alu_op: ALUOp::Sub,
3589
size: OperandSize::Size64,
3590
rd: start,
3591
rn: start.to_reg(),
3592
imm12: step,
3593
}
3594
.emit(sink, emit_info, state);
3595
Inst::Store32 {
3596
rd: regs::zero_reg(),
3597
mem: AMode::RegReg {
3598
rn: regs::stack_reg(),
3599
rm: start.to_reg(),
3600
},
3601
flags: MemFlags::trusted(),
3602
}
3603
.emit(sink, emit_info, state);
3604
Inst::AluRRR {
3605
alu_op: ALUOp::AddS,
3606
size: OperandSize::Size64,
3607
rd: regs::writable_zero_reg(),
3608
rn: start.to_reg(),
3609
rm: end,
3610
}
3611
.emit(sink, emit_info, state);
3612
3613
let loop_end = sink.get_label();
3614
Inst::CondBr {
3615
taken: BranchTarget::Label(loop_start),
3616
not_taken: BranchTarget::Label(loop_end),
3617
kind: CondBrKind::Cond(Cond::Gt),
3618
}
3619
.emit(sink, emit_info, state);
3620
sink.bind_label(loop_end, &mut state.ctrl_plane);
3621
}
3622
}
3623
3624
let end_off = sink.cur_offset();
3625
debug_assert!(
3626
(end_off - start_off) <= Inst::worst_case_size()
3627
|| matches!(self, Inst::EmitIsland { .. }),
3628
"Worst case size exceed for {:?}: {}",
3629
self,
3630
end_off - start_off
3631
);
3632
3633
state.clear_post_insn();
3634
}
3635
3636
fn pretty_print_inst(&self, state: &mut Self::State) -> String {
3637
self.print_with_state(state)
3638
}
3639
}
3640
3641
fn emit_return_call_common_sequence<T>(
3642
sink: &mut MachBuffer<Inst>,
3643
emit_info: &EmitInfo,
3644
state: &mut EmitState,
3645
info: &ReturnCallInfo<T>,
3646
) {
3647
for inst in
3648
AArch64MachineDeps::gen_clobber_restore(CallConv::Tail, &emit_info.0, state.frame_layout())
3649
{
3650
inst.emit(sink, emit_info, state);
3651
}
3652
3653
let setup_area_size = state.frame_layout().setup_area_size;
3654
if setup_area_size > 0 {
3655
// N.B.: sp is already adjusted to the appropriate place by the
3656
// clobber-restore code (which also frees the fixed frame). Hence, there
3657
// is no need for the usual `mov sp, fp` here.
3658
3659
// `ldp fp, lr, [sp], #16`
3660
Inst::LoadP64 {
3661
rt: writable_fp_reg(),
3662
rt2: writable_link_reg(),
3663
mem: PairAMode::SPPostIndexed {
3664
// TODO: we could fold the increment for incoming_args_diff here, as long as that
3665
// value is less than 502*8, by adding it to `setup_area_size`.
3666
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDP--Load-Pair-of-Registers-
3667
simm7: SImm7Scaled::maybe_from_i64(i64::from(setup_area_size), types::I64).unwrap(),
3668
},
3669
flags: MemFlags::trusted(),
3670
}
3671
.emit(sink, emit_info, state);
3672
}
3673
3674
// Adjust SP to account for the possible over-allocation in the prologue.
3675
let incoming_args_diff = state.frame_layout().tail_args_size - info.new_stack_arg_size;
3676
if incoming_args_diff > 0 {
3677
for inst in
3678
AArch64MachineDeps::gen_sp_reg_adjust(i32::try_from(incoming_args_diff).unwrap())
3679
{
3680
inst.emit(sink, emit_info, state);
3681
}
3682
}
3683
3684
if let Some(key) = info.key {
3685
sink.put4(key.enc_auti_hint());
3686
}
3687
}
3688
3689