Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/aarch64/abi.rs
1693 views
1
//! Implementation of a standard AArch64 ABI.
2
3
use crate::CodegenResult;
4
use crate::ir;
5
use crate::ir::MemFlags;
6
use crate::ir::types;
7
use crate::ir::types::*;
8
use crate::ir::{ExternalName, LibCall, Signature, dynamic_to_fixed};
9
use crate::isa;
10
use crate::isa::aarch64::{inst::*, settings as aarch64_settings};
11
use crate::isa::unwind::UnwindInst;
12
use crate::isa::winch;
13
use crate::machinst::*;
14
use crate::settings;
15
use alloc::boxed::Box;
16
use alloc::vec::Vec;
17
use regalloc2::{MachineEnv, PReg, PRegSet};
18
use smallvec::{SmallVec, smallvec};
19
use std::borrow::ToOwned;
20
use std::sync::OnceLock;
21
22
// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
23
// these ABIs are very similar.
24
25
/// Support for the AArch64 ABI from the callee side (within a function body).
26
pub(crate) type AArch64Callee = Callee<AArch64MachineDeps>;
27
28
impl From<StackAMode> for AMode {
29
fn from(stack: StackAMode) -> AMode {
30
match stack {
31
StackAMode::IncomingArg(off, stack_args_size) => AMode::IncomingArg {
32
off: i64::from(stack_args_size) - off,
33
},
34
StackAMode::Slot(off) => AMode::SlotOffset { off },
35
StackAMode::OutgoingArg(off) => AMode::SPOffset { off },
36
}
37
}
38
}
39
40
// Returns the size of stack space needed to store the
41
// `clobbered_callee_saved` registers.
42
fn compute_clobber_size(clobbered_callee_saves: &[Writable<RealReg>]) -> u32 {
43
let mut int_regs = 0;
44
let mut vec_regs = 0;
45
for &reg in clobbered_callee_saves {
46
match reg.to_reg().class() {
47
RegClass::Int => {
48
int_regs += 1;
49
}
50
RegClass::Float => {
51
vec_regs += 1;
52
}
53
RegClass::Vector => unreachable!(),
54
}
55
}
56
57
// Round up to multiple of 2, to keep 16-byte stack alignment.
58
let int_save_bytes = (int_regs + (int_regs & 1)) * 8;
59
// The Procedure Call Standard for the Arm 64-bit Architecture
60
// (AAPCS64, including several related ABIs such as the one used by
61
// Windows) mandates saving only the bottom 8 bytes of the vector
62
// registers, so we round up the number of registers to ensure
63
// proper stack alignment (similarly to the situation with
64
// `int_reg`).
65
let vec_reg_size = 8;
66
let vec_save_padding = vec_regs & 1;
67
// FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs?
68
let vec_save_bytes = (vec_regs + vec_save_padding) * vec_reg_size;
69
70
int_save_bytes + vec_save_bytes
71
}
72
73
/// AArch64-specific ABI behavior. This struct just serves as an implementation
74
/// point for the trait; it is never actually instantiated.
75
pub struct AArch64MachineDeps;
76
77
impl IsaFlags for aarch64_settings::Flags {
78
fn is_forward_edge_cfi_enabled(&self) -> bool {
79
self.use_bti()
80
}
81
}
82
83
impl ABIMachineSpec for AArch64MachineDeps {
84
type I = Inst;
85
86
type F = aarch64_settings::Flags;
87
88
/// This is the limit for the size of argument and return-value areas on the
89
/// stack. We place a reasonable limit here to avoid integer overflow issues
90
/// with 32-bit arithmetic: for now, 128 MB.
91
const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
92
93
fn word_bits() -> u32 {
94
64
95
}
96
97
/// Return required stack alignment in bytes.
98
fn stack_align(_call_conv: isa::CallConv) -> u32 {
99
16
100
}
101
102
fn compute_arg_locs(
103
call_conv: isa::CallConv,
104
flags: &settings::Flags,
105
params: &[ir::AbiParam],
106
args_or_rets: ArgsOrRets,
107
add_ret_area_ptr: bool,
108
mut args: ArgsAccumulator,
109
) -> CodegenResult<(u32, Option<usize>)> {
110
let is_apple_cc = call_conv == isa::CallConv::AppleAarch64;
111
let is_winch_return = call_conv == isa::CallConv::Winch && args_or_rets == ArgsOrRets::Rets;
112
113
// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
114
//
115
// MacOS aarch64 is slightly different, see also
116
// https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
117
// We are diverging from the MacOS aarch64 implementation in the
118
// following ways:
119
// - sign- and zero- extensions of data types less than 32 bits are not
120
// implemented yet.
121
// - we align the arguments stack space to a 16-bytes boundary, while
122
// the MacOS allows aligning only on 8 bytes. In practice it means we're
123
// slightly overallocating when calling, which is fine, and doesn't
124
// break our other invariants that the stack is always allocated in
125
// 16-bytes chunks.
126
127
let mut next_xreg = if call_conv == isa::CallConv::Tail {
128
// We reserve `x0` for the return area pointer. For simplicity, we
129
// reserve it even when there is no return area pointer needed. This
130
// also means that identity functions don't have to shuffle arguments to
131
// different return registers because we shifted all argument register
132
// numbers down by one to make space for the return area pointer.
133
//
134
// Also, we cannot use all allocatable GPRs as arguments because we need
135
// at least one allocatable register for holding the callee address in
136
// indirect calls. So skip `x1` also, reserving it for that role.
137
2
138
} else {
139
0
140
};
141
let mut next_vreg = 0;
142
let mut next_stack: u32 = 0;
143
144
// Note on return values: on the regular ABI, we may return values
145
// in 8 registers for V128 and I64 registers independently of the
146
// number of register values returned in the other class. That is,
147
// we can return values in up to 8 integer and
148
// 8 vector registers at once.
149
let max_per_class_reg_vals = 8; // x0-x7 and v0-v7
150
let mut remaining_reg_vals = 16;
151
152
let ret_area_ptr = if add_ret_area_ptr {
153
debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
154
if call_conv != isa::CallConv::Winch {
155
// In the AAPCS64 calling convention the return area pointer is
156
// stored in x8.
157
Some(ABIArg::reg(
158
xreg(8).to_real_reg().unwrap(),
159
I64,
160
ir::ArgumentExtension::None,
161
ir::ArgumentPurpose::Normal,
162
))
163
} else {
164
// Use x0 for the return area pointer in the Winch calling convention
165
// to simplify the ABI handling code in Winch by avoiding an AArch64
166
// special case to assign it to x8.
167
next_xreg += 1;
168
Some(ABIArg::reg(
169
xreg(0).to_real_reg().unwrap(),
170
I64,
171
ir::ArgumentExtension::None,
172
ir::ArgumentPurpose::Normal,
173
))
174
}
175
} else {
176
None
177
};
178
179
for (i, param) in params.into_iter().enumerate() {
180
if is_apple_cc && param.value_type == types::F128 && !flags.enable_llvm_abi_extensions()
181
{
182
panic!(
183
"f128 args/return values not supported for apple_aarch64 unless LLVM ABI extensions are enabled"
184
);
185
}
186
187
let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;
188
189
if let ir::ArgumentPurpose::StructReturn = param.purpose {
190
assert!(
191
call_conv != isa::CallConv::Tail,
192
"support for StructReturn parameters is not implemented for the `tail` \
193
calling convention yet",
194
);
195
}
196
197
if let ir::ArgumentPurpose::StructArgument(_) = param.purpose {
198
panic!(
199
"StructArgument parameters are not supported on arm64. \
200
Use regular pointer arguments instead."
201
);
202
}
203
204
if let ir::ArgumentPurpose::StructReturn = param.purpose {
205
// FIXME add assert_eq!(args_or_rets, ArgsOrRets::Args); once
206
// ensure_struct_return_ptr_is_returned is gone.
207
assert!(
208
param.value_type == types::I64,
209
"StructReturn must be a pointer sized integer"
210
);
211
args.push(ABIArg::Slots {
212
slots: smallvec![ABIArgSlot::Reg {
213
reg: xreg(8).to_real_reg().unwrap(),
214
ty: types::I64,
215
extension: param.extension,
216
},],
217
purpose: ir::ArgumentPurpose::StructReturn,
218
});
219
continue;
220
}
221
222
// Handle multi register params
223
//
224
// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C).
225
//
226
// For arguments with alignment of 16 we round up the register number
227
// to the next even value. So we can never allocate for example an i128
228
// to X1 and X2, we have to skip one register and do X2, X3
229
// (Stage C.8)
230
// Note: The Apple ABI deviates a bit here. They don't respect Stage C.8
231
// and will happily allocate a i128 to X1 and X2
232
//
233
// For integer types with alignment of 16 we also have the additional
234
// restriction of passing the lower half in Xn and the upper half in Xn+1
235
// (Stage C.9)
236
//
237
// For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh
238
//
239
// On the Apple ABI it is unspecified if we can spill half the value into the stack
240
// i.e load the lower half into x7 and the upper half into the stack
241
// LLVM does not seem to do this, so we are going to replicate that behaviour
242
let is_multi_reg = rcs.len() >= 2;
243
if is_multi_reg {
244
assert!(
245
rcs.len() == 2,
246
"Unable to handle multi reg params with more than 2 regs"
247
);
248
assert!(
249
rcs == &[RegClass::Int, RegClass::Int],
250
"Unable to handle non i64 regs"
251
);
252
253
let reg_class_space = max_per_class_reg_vals - next_xreg;
254
let reg_space = remaining_reg_vals;
255
256
if reg_space >= 2 && reg_class_space >= 2 {
257
// The aarch64 ABI does not allow us to start a split argument
258
// at an odd numbered register. So we need to skip one register
259
//
260
// TODO: The Fast ABI should probably not skip the register
261
if !is_apple_cc && next_xreg % 2 != 0 {
262
next_xreg += 1;
263
}
264
265
let lower_reg = xreg(next_xreg);
266
let upper_reg = xreg(next_xreg + 1);
267
268
args.push(ABIArg::Slots {
269
slots: smallvec![
270
ABIArgSlot::Reg {
271
reg: lower_reg.to_real_reg().unwrap(),
272
ty: reg_types[0],
273
extension: param.extension,
274
},
275
ABIArgSlot::Reg {
276
reg: upper_reg.to_real_reg().unwrap(),
277
ty: reg_types[1],
278
extension: param.extension,
279
},
280
],
281
purpose: param.purpose,
282
});
283
284
next_xreg += 2;
285
remaining_reg_vals -= 2;
286
continue;
287
}
288
} else {
289
// Single Register parameters
290
let rc = rcs[0];
291
let next_reg = match rc {
292
RegClass::Int => &mut next_xreg,
293
RegClass::Float => &mut next_vreg,
294
RegClass::Vector => unreachable!(),
295
};
296
297
let push_to_reg = if is_winch_return {
298
// Winch uses the first register to return the last result
299
i == params.len() - 1
300
} else {
301
// Use max_per_class_reg_vals & remaining_reg_vals otherwise
302
*next_reg < max_per_class_reg_vals && remaining_reg_vals > 0
303
};
304
305
if push_to_reg {
306
let reg = match rc {
307
RegClass::Int => xreg(*next_reg),
308
RegClass::Float => vreg(*next_reg),
309
RegClass::Vector => unreachable!(),
310
};
311
// Overlay Z-regs on V-regs for parameter passing.
312
let ty = if param.value_type.is_dynamic_vector() {
313
dynamic_to_fixed(param.value_type)
314
} else {
315
param.value_type
316
};
317
args.push(ABIArg::reg(
318
reg.to_real_reg().unwrap(),
319
ty,
320
param.extension,
321
param.purpose,
322
));
323
*next_reg += 1;
324
remaining_reg_vals -= 1;
325
continue;
326
}
327
}
328
329
// Spill to the stack
330
331
if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
332
return Err(crate::CodegenError::Unsupported(
333
"Too many return values to fit in registers. \
334
Use a StructReturn argument instead. (#9510)"
335
.to_owned(),
336
));
337
}
338
339
// Compute the stack slot's size.
340
let size = (ty_bits(param.value_type) / 8) as u32;
341
342
let size = if is_apple_cc || is_winch_return {
343
// MacOS and Winch aarch64 allows stack slots with
344
// sizes less than 8 bytes. They still need to be
345
// properly aligned on their natural data alignment,
346
// though.
347
size
348
} else {
349
// Every arg takes a minimum slot of 8 bytes. (16-byte stack
350
// alignment happens separately after all args.)
351
std::cmp::max(size, 8)
352
};
353
354
if !is_winch_return {
355
// Align the stack slot.
356
debug_assert!(size.is_power_of_two());
357
next_stack = align_to(next_stack, size);
358
}
359
360
let slots = reg_types
361
.iter()
362
.copied()
363
// Build the stack locations from each slot
364
.scan(next_stack, |next_stack, ty| {
365
let slot_offset = *next_stack as i64;
366
*next_stack += (ty_bits(ty) / 8) as u32;
367
368
Some((ty, slot_offset))
369
})
370
.map(|(ty, offset)| ABIArgSlot::Stack {
371
offset,
372
ty,
373
extension: param.extension,
374
})
375
.collect();
376
377
args.push(ABIArg::Slots {
378
slots,
379
purpose: param.purpose,
380
});
381
382
next_stack += size;
383
}
384
385
let extra_arg = if let Some(ret_area_ptr) = ret_area_ptr {
386
args.push_non_formal(ret_area_ptr);
387
Some(args.args().len() - 1)
388
} else {
389
None
390
};
391
392
if is_winch_return {
393
winch::reverse_stack(args, next_stack, false);
394
}
395
396
next_stack = align_to(next_stack, 16);
397
398
Ok((next_stack, extra_arg))
399
}
400
401
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
402
Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
403
}
404
405
fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
406
Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
407
}
408
409
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
410
Inst::gen_move(to_reg, from_reg, ty)
411
}
412
413
fn gen_extend(
414
to_reg: Writable<Reg>,
415
from_reg: Reg,
416
signed: bool,
417
from_bits: u8,
418
to_bits: u8,
419
) -> Inst {
420
assert!(from_bits < to_bits);
421
Inst::Extend {
422
rd: to_reg,
423
rn: from_reg,
424
signed,
425
from_bits,
426
to_bits,
427
}
428
}
429
430
fn gen_args(args: Vec<ArgPair>) -> Inst {
431
Inst::Args { args }
432
}
433
434
fn gen_rets(rets: Vec<RetPair>) -> Inst {
435
Inst::Rets { rets }
436
}
437
438
fn gen_add_imm(
439
_call_conv: isa::CallConv,
440
into_reg: Writable<Reg>,
441
from_reg: Reg,
442
imm: u32,
443
) -> SmallInstVec<Inst> {
444
let imm = imm as u64;
445
let mut insts = SmallVec::new();
446
if let Some(imm12) = Imm12::maybe_from_u64(imm) {
447
insts.push(Inst::AluRRImm12 {
448
alu_op: ALUOp::Add,
449
size: OperandSize::Size64,
450
rd: into_reg,
451
rn: from_reg,
452
imm12,
453
});
454
} else {
455
let scratch2 = writable_tmp2_reg();
456
assert_ne!(scratch2.to_reg(), from_reg);
457
// `gen_add_imm` is only ever called after register allocation has taken place, and as a
458
// result it's ok to reuse the scratch2 register here. If that changes, we'll need to
459
// plumb through a way to allocate temporary virtual registers
460
insts.extend(Inst::load_constant(scratch2, imm));
461
insts.push(Inst::AluRRRExtend {
462
alu_op: ALUOp::Add,
463
size: OperandSize::Size64,
464
rd: into_reg,
465
rn: from_reg,
466
rm: scratch2.to_reg(),
467
extendop: ExtendOp::UXTX,
468
});
469
}
470
insts
471
}
472
473
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
474
let mut insts = SmallVec::new();
475
insts.push(Inst::AluRRRExtend {
476
alu_op: ALUOp::SubS,
477
size: OperandSize::Size64,
478
rd: writable_zero_reg(),
479
rn: stack_reg(),
480
rm: limit_reg,
481
extendop: ExtendOp::UXTX,
482
});
483
insts.push(Inst::TrapIf {
484
trap_code: ir::TrapCode::STACK_OVERFLOW,
485
// Here `Lo` == "less than" when interpreting the two
486
// operands as unsigned integers.
487
kind: CondBrKind::Cond(Cond::Lo),
488
});
489
insts
490
}
491
492
fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst {
493
// FIXME: Do something different for dynamic types?
494
let mem = mem.into();
495
Inst::LoadAddr { rd: into_reg, mem }
496
}
497
498
fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
499
spilltmp_reg()
500
}
501
502
fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
503
let mem = AMode::RegOffset {
504
rn: base,
505
off: offset as i64,
506
};
507
Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
508
}
509
510
fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
511
let mem = AMode::RegOffset {
512
rn: base,
513
off: offset as i64,
514
};
515
Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
516
}
517
518
fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
519
if amount == 0 {
520
return SmallVec::new();
521
}
522
523
let (amount, is_sub) = if amount > 0 {
524
(amount as u64, false)
525
} else {
526
(-amount as u64, true)
527
};
528
529
let alu_op = if is_sub { ALUOp::Sub } else { ALUOp::Add };
530
531
let mut ret = SmallVec::new();
532
if let Some(imm12) = Imm12::maybe_from_u64(amount) {
533
let adj_inst = Inst::AluRRImm12 {
534
alu_op,
535
size: OperandSize::Size64,
536
rd: writable_stack_reg(),
537
rn: stack_reg(),
538
imm12,
539
};
540
ret.push(adj_inst);
541
} else {
542
let tmp = writable_spilltmp_reg();
543
// `gen_sp_reg_adjust` is called after regalloc2, so it's acceptable to reuse `tmp` for
544
// intermediates in `load_constant`.
545
let const_inst = Inst::load_constant(tmp, amount);
546
let adj_inst = Inst::AluRRRExtend {
547
alu_op,
548
size: OperandSize::Size64,
549
rd: writable_stack_reg(),
550
rn: stack_reg(),
551
rm: tmp.to_reg(),
552
extendop: ExtendOp::UXTX,
553
};
554
ret.extend(const_inst);
555
ret.push(adj_inst);
556
}
557
ret
558
}
559
560
fn gen_prologue_frame_setup(
561
call_conv: isa::CallConv,
562
flags: &settings::Flags,
563
isa_flags: &aarch64_settings::Flags,
564
frame_layout: &FrameLayout,
565
) -> SmallInstVec<Inst> {
566
let setup_frame = frame_layout.setup_area_size > 0;
567
let mut insts = SmallVec::new();
568
569
match Self::select_api_key(isa_flags, call_conv, setup_frame) {
570
Some(key) => {
571
insts.push(Inst::Paci { key });
572
if flags.unwind_info() {
573
insts.push(Inst::Unwind {
574
inst: UnwindInst::Aarch64SetPointerAuth {
575
return_addresses: true,
576
},
577
});
578
}
579
}
580
None => {
581
if isa_flags.use_bti() {
582
insts.push(Inst::Bti {
583
targets: BranchTargetType::C,
584
});
585
}
586
587
if flags.unwind_info() && call_conv == isa::CallConv::AppleAarch64 {
588
// The macOS unwinder seems to require this.
589
insts.push(Inst::Unwind {
590
inst: UnwindInst::Aarch64SetPointerAuth {
591
return_addresses: false,
592
},
593
});
594
}
595
}
596
}
597
598
if setup_frame {
599
// stp fp (x29), lr (x30), [sp, #-16]!
600
insts.push(Inst::StoreP64 {
601
rt: fp_reg(),
602
rt2: link_reg(),
603
mem: PairAMode::SPPreIndexed {
604
simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
605
},
606
flags: MemFlags::trusted(),
607
});
608
609
if flags.unwind_info() {
610
insts.push(Inst::Unwind {
611
inst: UnwindInst::PushFrameRegs {
612
offset_upward_to_caller_sp: frame_layout.setup_area_size,
613
},
614
});
615
}
616
617
// mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
618
// the usual encoding (`ORR`) does not work with SP.
619
insts.push(Inst::AluRRImm12 {
620
alu_op: ALUOp::Add,
621
size: OperandSize::Size64,
622
rd: writable_fp_reg(),
623
rn: stack_reg(),
624
imm12: Imm12 {
625
bits: 0,
626
shift12: false,
627
},
628
});
629
}
630
631
insts
632
}
633
634
fn gen_epilogue_frame_restore(
635
call_conv: isa::CallConv,
636
_flags: &settings::Flags,
637
_isa_flags: &aarch64_settings::Flags,
638
frame_layout: &FrameLayout,
639
) -> SmallInstVec<Inst> {
640
let setup_frame = frame_layout.setup_area_size > 0;
641
let mut insts = SmallVec::new();
642
643
if setup_frame {
644
// N.B.: sp is already adjusted to the appropriate place by the
645
// clobber-restore code (which also frees the fixed frame). Hence, there
646
// is no need for the usual `mov sp, fp` here.
647
648
// `ldp fp, lr, [sp], #16`
649
insts.push(Inst::LoadP64 {
650
rt: writable_fp_reg(),
651
rt2: writable_link_reg(),
652
mem: PairAMode::SPPostIndexed {
653
simm7: SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
654
},
655
flags: MemFlags::trusted(),
656
});
657
}
658
659
if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {
660
insts.extend(Self::gen_sp_reg_adjust(
661
frame_layout.tail_args_size.try_into().unwrap(),
662
));
663
}
664
665
insts
666
}
667
668
fn gen_return(
669
call_conv: isa::CallConv,
670
isa_flags: &aarch64_settings::Flags,
671
frame_layout: &FrameLayout,
672
) -> SmallInstVec<Inst> {
673
let setup_frame = frame_layout.setup_area_size > 0;
674
675
match Self::select_api_key(isa_flags, call_conv, setup_frame) {
676
Some(key) => {
677
smallvec![Inst::AuthenticatedRet {
678
key,
679
is_hint: !isa_flags.has_pauth(),
680
}]
681
}
682
None => {
683
smallvec![Inst::Ret {}]
684
}
685
}
686
}
687
688
fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _: u32) {
689
// TODO: implement if we ever require stack probes on an AArch64 host
690
// (unlikely unless Lucet is ported)
691
unimplemented!("Stack probing is unimplemented on AArch64");
692
}
693
694
fn gen_inline_probestack(
695
insts: &mut SmallInstVec<Self::I>,
696
_call_conv: isa::CallConv,
697
frame_size: u32,
698
guard_size: u32,
699
) {
700
// The stack probe loop currently takes 6 instructions and each inline
701
// probe takes 2 (ish, these numbers sort of depend on the constants).
702
// Set this to 3 to keep the max size of the probe to 6 instructions.
703
const PROBE_MAX_UNROLL: u32 = 3;
704
705
// Calculate how many probes we need to perform. Round down, as we only
706
// need to probe whole guard_size regions we'd otherwise skip over.
707
let probe_count = frame_size / guard_size;
708
if probe_count == 0 {
709
// No probe necessary
710
} else if probe_count <= PROBE_MAX_UNROLL {
711
Self::gen_probestack_unroll(insts, guard_size, probe_count)
712
} else {
713
Self::gen_probestack_loop(insts, frame_size, guard_size)
714
}
715
}
716
717
fn gen_clobber_save(
718
_call_conv: isa::CallConv,
719
flags: &settings::Flags,
720
frame_layout: &FrameLayout,
721
) -> SmallVec<[Inst; 16]> {
722
let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();
723
724
let mut insts = SmallVec::new();
725
let setup_frame = frame_layout.setup_area_size > 0;
726
727
// When a return_call within this function required more stack arguments than we have
728
// present, resize the incoming argument area of the frame to accommodate those arguments.
729
let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
730
if incoming_args_diff > 0 {
731
// Decrement SP to account for the additional space required by a tail call.
732
insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));
733
if flags.unwind_info() {
734
insts.push(Inst::Unwind {
735
inst: UnwindInst::StackAlloc {
736
size: incoming_args_diff,
737
},
738
});
739
}
740
741
// Move fp and lr down.
742
if setup_frame {
743
// Reload the frame pointer from the stack.
744
insts.push(Inst::ULoad64 {
745
rd: regs::writable_fp_reg(),
746
mem: AMode::SPOffset {
747
off: i64::from(incoming_args_diff),
748
},
749
flags: MemFlags::trusted(),
750
});
751
752
// Store the frame pointer and link register again at the new SP
753
insts.push(Inst::StoreP64 {
754
rt: fp_reg(),
755
rt2: link_reg(),
756
mem: PairAMode::SignedOffset {
757
reg: regs::stack_reg(),
758
simm7: SImm7Scaled::maybe_from_i64(0, types::I64).unwrap(),
759
},
760
flags: MemFlags::trusted(),
761
});
762
763
// Keep the frame pointer in sync
764
insts.push(Self::gen_move(
765
regs::writable_fp_reg(),
766
regs::stack_reg(),
767
types::I64,
768
));
769
}
770
}
771
772
if flags.unwind_info() && setup_frame {
773
// The *unwind* frame (but not the actual frame) starts at the
774
// clobbers, just below the saved FP/LR pair.
775
insts.push(Inst::Unwind {
776
inst: UnwindInst::DefineNewFrame {
777
offset_downward_to_clobbers: frame_layout.clobber_size,
778
offset_upward_to_caller_sp: frame_layout.setup_area_size,
779
},
780
});
781
}
782
783
// We use pre-indexed addressing modes here, rather than the possibly
784
// more efficient "subtract sp once then used fixed offsets" scheme,
785
// because (i) we cannot necessarily guarantee that the offset of a
786
// clobber-save slot will be within a SImm7Scaled (+504-byte) offset
787
// range of the whole frame including other slots, it is more complex to
788
// conditionally generate a two-stage SP adjustment (clobbers then fixed
789
// frame) otherwise, and generally we just want to maintain simplicity
790
// here for maintainability. Because clobbers are at the top of the
791
// frame, just below FP, all that is necessary is to use the pre-indexed
792
// "push" `[sp, #-16]!` addressing mode.
793
//
794
// `frame_offset` tracks offset above start-of-clobbers for unwind-info
795
// purposes.
796
let mut clobber_offset = frame_layout.clobber_size;
797
let clobber_offset_change = 16;
798
let iter = clobbered_int.chunks_exact(2);
799
800
if let [rd] = iter.remainder() {
801
let rd: Reg = rd.to_reg().into();
802
803
debug_assert_eq!(rd.class(), RegClass::Int);
804
// str rd, [sp, #-16]!
805
insts.push(Inst::Store64 {
806
rd,
807
mem: AMode::SPPreIndexed {
808
simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
809
},
810
flags: MemFlags::trusted(),
811
});
812
813
if flags.unwind_info() {
814
clobber_offset -= clobber_offset_change as u32;
815
insts.push(Inst::Unwind {
816
inst: UnwindInst::SaveReg {
817
clobber_offset,
818
reg: rd.to_real_reg().unwrap(),
819
},
820
});
821
}
822
}
823
824
let mut iter = iter.rev();
825
826
while let Some([rt, rt2]) = iter.next() {
827
// .to_reg().into(): Writable<RealReg> --> RealReg --> Reg
828
let rt: Reg = rt.to_reg().into();
829
let rt2: Reg = rt2.to_reg().into();
830
831
debug_assert!(rt.class() == RegClass::Int);
832
debug_assert!(rt2.class() == RegClass::Int);
833
834
// stp rt, rt2, [sp, #-16]!
835
insts.push(Inst::StoreP64 {
836
rt,
837
rt2,
838
mem: PairAMode::SPPreIndexed {
839
simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
840
},
841
flags: MemFlags::trusted(),
842
});
843
844
if flags.unwind_info() {
845
clobber_offset -= clobber_offset_change as u32;
846
insts.push(Inst::Unwind {
847
inst: UnwindInst::SaveReg {
848
clobber_offset,
849
reg: rt.to_real_reg().unwrap(),
850
},
851
});
852
insts.push(Inst::Unwind {
853
inst: UnwindInst::SaveReg {
854
clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,
855
reg: rt2.to_real_reg().unwrap(),
856
},
857
});
858
}
859
}
860
861
let store_vec_reg = |rd| Inst::FpuStore64 {
862
rd,
863
mem: AMode::SPPreIndexed {
864
simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
865
},
866
flags: MemFlags::trusted(),
867
};
868
let iter = clobbered_vec.chunks_exact(2);
869
870
if let [rd] = iter.remainder() {
871
let rd: Reg = rd.to_reg().into();
872
873
debug_assert_eq!(rd.class(), RegClass::Float);
874
insts.push(store_vec_reg(rd));
875
876
if flags.unwind_info() {
877
clobber_offset -= clobber_offset_change as u32;
878
insts.push(Inst::Unwind {
879
inst: UnwindInst::SaveReg {
880
clobber_offset,
881
reg: rd.to_real_reg().unwrap(),
882
},
883
});
884
}
885
}
886
887
let store_vec_reg_pair = |rt, rt2| {
888
let clobber_offset_change = 16;
889
890
(
891
Inst::FpuStoreP64 {
892
rt,
893
rt2,
894
mem: PairAMode::SPPreIndexed {
895
simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
896
},
897
flags: MemFlags::trusted(),
898
},
899
clobber_offset_change as u32,
900
)
901
};
902
let mut iter = iter.rev();
903
904
while let Some([rt, rt2]) = iter.next() {
905
let rt: Reg = rt.to_reg().into();
906
let rt2: Reg = rt2.to_reg().into();
907
908
debug_assert_eq!(rt.class(), RegClass::Float);
909
debug_assert_eq!(rt2.class(), RegClass::Float);
910
911
let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);
912
913
insts.push(inst);
914
915
if flags.unwind_info() {
916
clobber_offset -= clobber_offset_change;
917
insts.push(Inst::Unwind {
918
inst: UnwindInst::SaveReg {
919
clobber_offset,
920
reg: rt.to_real_reg().unwrap(),
921
},
922
});
923
insts.push(Inst::Unwind {
924
inst: UnwindInst::SaveReg {
925
clobber_offset: clobber_offset + clobber_offset_change / 2,
926
reg: rt2.to_real_reg().unwrap(),
927
},
928
});
929
}
930
}
931
932
// Allocate the fixed frame below the clobbers if necessary.
933
let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
934
if stack_size > 0 {
935
insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32)));
936
if flags.unwind_info() {
937
insts.push(Inst::Unwind {
938
inst: UnwindInst::StackAlloc { size: stack_size },
939
});
940
}
941
}
942
943
insts
944
}
945
946
fn gen_clobber_restore(
947
_call_conv: isa::CallConv,
948
_flags: &settings::Flags,
949
frame_layout: &FrameLayout,
950
) -> SmallVec<[Inst; 16]> {
951
let mut insts = SmallVec::new();
952
let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();
953
954
// Free the fixed frame if necessary.
955
let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
956
if stack_size > 0 {
957
insts.extend(Self::gen_sp_reg_adjust(stack_size as i32));
958
}
959
960
let load_vec_reg = |rd| Inst::FpuLoad64 {
961
rd,
962
mem: AMode::SPPostIndexed {
963
simm9: SImm9::maybe_from_i64(16).unwrap(),
964
},
965
flags: MemFlags::trusted(),
966
};
967
let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 {
968
rt,
969
rt2,
970
mem: PairAMode::SPPostIndexed {
971
simm7: SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
972
},
973
flags: MemFlags::trusted(),
974
};
975
976
let mut iter = clobbered_vec.chunks_exact(2);
977
978
while let Some([rt, rt2]) = iter.next() {
979
let rt: Writable<Reg> = rt.map(|r| r.into());
980
let rt2: Writable<Reg> = rt2.map(|r| r.into());
981
982
debug_assert_eq!(rt.to_reg().class(), RegClass::Float);
983
debug_assert_eq!(rt2.to_reg().class(), RegClass::Float);
984
insts.push(load_vec_reg_pair(rt, rt2));
985
}
986
987
debug_assert!(iter.remainder().len() <= 1);
988
989
if let [rd] = iter.remainder() {
990
let rd: Writable<Reg> = rd.map(|r| r.into());
991
992
debug_assert_eq!(rd.to_reg().class(), RegClass::Float);
993
insts.push(load_vec_reg(rd));
994
}
995
996
let mut iter = clobbered_int.chunks_exact(2);
997
998
while let Some([rt, rt2]) = iter.next() {
999
let rt: Writable<Reg> = rt.map(|r| r.into());
1000
let rt2: Writable<Reg> = rt2.map(|r| r.into());
1001
1002
debug_assert_eq!(rt.to_reg().class(), RegClass::Int);
1003
debug_assert_eq!(rt2.to_reg().class(), RegClass::Int);
1004
// ldp rt, rt2, [sp], #16
1005
insts.push(Inst::LoadP64 {
1006
rt,
1007
rt2,
1008
mem: PairAMode::SPPostIndexed {
1009
simm7: SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
1010
},
1011
flags: MemFlags::trusted(),
1012
});
1013
}
1014
1015
debug_assert!(iter.remainder().len() <= 1);
1016
1017
if let [rd] = iter.remainder() {
1018
let rd: Writable<Reg> = rd.map(|r| r.into());
1019
1020
debug_assert_eq!(rd.to_reg().class(), RegClass::Int);
1021
// ldr rd, [sp], #16
1022
insts.push(Inst::ULoad64 {
1023
rd,
1024
mem: AMode::SPPostIndexed {
1025
simm9: SImm9::maybe_from_i64(16).unwrap(),
1026
},
1027
flags: MemFlags::trusted(),
1028
});
1029
}
1030
1031
insts
1032
}
1033
1034
fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
1035
call_conv: isa::CallConv,
1036
dst: Reg,
1037
src: Reg,
1038
size: usize,
1039
mut alloc_tmp: F,
1040
) -> SmallVec<[Self::I; 8]> {
1041
let mut insts = SmallVec::new();
1042
let arg0 = writable_xreg(0);
1043
let arg1 = writable_xreg(1);
1044
let arg2 = writable_xreg(2);
1045
let tmp = alloc_tmp(Self::word_type());
1046
insts.extend(Inst::load_constant(tmp, size as u64));
1047
insts.push(Inst::Call {
1048
info: Box::new(CallInfo {
1049
dest: ExternalName::LibCall(LibCall::Memcpy),
1050
uses: smallvec![
1051
CallArgPair {
1052
vreg: dst,
1053
preg: arg0.to_reg()
1054
},
1055
CallArgPair {
1056
vreg: src,
1057
preg: arg1.to_reg()
1058
},
1059
CallArgPair {
1060
vreg: tmp.to_reg(),
1061
preg: arg2.to_reg()
1062
}
1063
],
1064
defs: smallvec![],
1065
clobbers: Self::get_regs_clobbered_by_call(call_conv, false),
1066
caller_conv: call_conv,
1067
callee_conv: call_conv,
1068
callee_pop_size: 0,
1069
try_call_info: None,
1070
}),
1071
});
1072
insts
1073
}
1074
1075
fn get_number_of_spillslots_for_value(
1076
rc: RegClass,
1077
vector_size: u32,
1078
_isa_flags: &Self::F,
1079
) -> u32 {
1080
assert_eq!(vector_size % 8, 0);
1081
// We allocate in terms of 8-byte slots.
1082
match rc {
1083
RegClass::Int => 1,
1084
RegClass::Float => vector_size / 8,
1085
RegClass::Vector => unreachable!(),
1086
}
1087
}
1088
1089
fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
1090
if flags.enable_pinned_reg() {
1091
static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
1092
MACHINE_ENV.get_or_init(|| create_reg_env(true))
1093
} else {
1094
static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
1095
MACHINE_ENV.get_or_init(|| create_reg_env(false))
1096
}
1097
}
1098
1099
fn get_regs_clobbered_by_call(call_conv: isa::CallConv, is_exception: bool) -> PRegSet {
1100
match call_conv {
1101
isa::CallConv::Winch => WINCH_CLOBBERS,
1102
isa::CallConv::Tail if is_exception => ALL_CLOBBERS,
1103
_ => DEFAULT_AAPCS_CLOBBERS,
1104
}
1105
}
1106
1107
fn get_ext_mode(
1108
call_conv: isa::CallConv,
1109
specified: ir::ArgumentExtension,
1110
) -> ir::ArgumentExtension {
1111
if call_conv == isa::CallConv::AppleAarch64 {
1112
specified
1113
} else {
1114
ir::ArgumentExtension::None
1115
}
1116
}
1117
1118
fn compute_frame_layout(
1119
call_conv: isa::CallConv,
1120
flags: &settings::Flags,
1121
sig: &Signature,
1122
regs: &[Writable<RealReg>],
1123
function_calls: FunctionCalls,
1124
incoming_args_size: u32,
1125
tail_args_size: u32,
1126
stackslots_size: u32,
1127
fixed_frame_storage_size: u32,
1128
outgoing_args_size: u32,
1129
) -> FrameLayout {
1130
let mut regs: Vec<Writable<RealReg>> = regs
1131
.iter()
1132
.cloned()
1133
.filter(|r| {
1134
is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg())
1135
})
1136
.collect();
1137
1138
// Sort registers for deterministic code output. We can do an unstable
1139
// sort because the registers will be unique (there are no dups).
1140
regs.sort_unstable();
1141
1142
// Compute clobber size.
1143
let clobber_size = compute_clobber_size(&regs);
1144
1145
// Compute linkage frame size.
1146
let setup_area_size = if flags.preserve_frame_pointers()
1147
|| function_calls != FunctionCalls::None
1148
// The function arguments that are passed on the stack are addressed
1149
// relative to the Frame Pointer.
1150
|| incoming_args_size > 0
1151
|| clobber_size > 0
1152
|| fixed_frame_storage_size > 0
1153
{
1154
16 // FP, LR
1155
} else {
1156
0
1157
};
1158
1159
// Return FrameLayout structure.
1160
FrameLayout {
1161
word_bytes: 8,
1162
incoming_args_size,
1163
tail_args_size,
1164
setup_area_size,
1165
clobber_size,
1166
fixed_frame_storage_size,
1167
stackslots_size,
1168
outgoing_args_size,
1169
clobbered_callee_saves: regs,
1170
function_calls,
1171
}
1172
}
1173
1174
fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {
1175
// Use x9 as a temp if needed: clobbered, not a
1176
// retval.
1177
regs::writable_xreg(9)
1178
}
1179
1180
fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {
1181
const PAYLOAD_REGS: &'static [Reg] = &[regs::xreg(0), regs::xreg(1)];
1182
match call_conv {
1183
isa::CallConv::SystemV | isa::CallConv::Tail => PAYLOAD_REGS,
1184
_ => &[],
1185
}
1186
}
1187
}
1188
1189
impl AArch64MachineDeps {
1190
fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
1191
// When manually unrolling adjust the stack pointer and then write a zero
1192
// to the stack at that offset. This generates something like
1193
// `sub sp, sp, #1, lsl #12` followed by `stur wzr, [sp]`.
1194
//
1195
// We do this because valgrind expects us to never write beyond the stack
1196
// pointer and associated redzone.
1197
// See: https://github.com/bytecodealliance/wasmtime/issues/7454
1198
for _ in 0..probe_count {
1199
insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
1200
1201
insts.push(Inst::gen_store(
1202
AMode::SPOffset { off: 0 },
1203
zero_reg(),
1204
I32,
1205
MemFlags::trusted(),
1206
));
1207
}
1208
1209
// Restore the stack pointer to its original value
1210
insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
1211
}
1212
1213
fn gen_probestack_loop(insts: &mut SmallInstVec<Inst>, frame_size: u32, guard_size: u32) {
1214
// The non-unrolled version uses two temporary registers. The
1215
// `start` contains the current offset from sp and counts downwards
1216
// during the loop by increments of `guard_size`. The `end` is
1217
// the size of the frame and where we stop.
1218
//
1219
// Note that this emission is all post-regalloc so it should be ok
1220
// to use the temporary registers here as input/output as the loop
1221
// itself is not allowed to use the registers.
1222
let start = writable_spilltmp_reg();
1223
let end = writable_tmp2_reg();
1224
// `gen_inline_probestack` is called after regalloc2, so it's acceptable to reuse
1225
// `start` and `end` as temporaries in load_constant.
1226
insts.extend(Inst::load_constant(start, 0));
1227
insts.extend(Inst::load_constant(end, frame_size.into()));
1228
insts.push(Inst::StackProbeLoop {
1229
start,
1230
end: end.to_reg(),
1231
step: Imm12::maybe_from_u64(guard_size.into()).unwrap(),
1232
});
1233
}
1234
1235
pub fn select_api_key(
1236
isa_flags: &aarch64_settings::Flags,
1237
call_conv: isa::CallConv,
1238
setup_frame: bool,
1239
) -> Option<APIKey> {
1240
if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) {
1241
// The `tail` calling convention uses a zero modifier rather than SP
1242
// because tail calls may happen with a different stack pointer than
1243
// when the function was entered, meaning that it won't be the same when
1244
// the return address is decrypted.
1245
Some(if isa_flags.sign_return_address_with_bkey() {
1246
match call_conv {
1247
isa::CallConv::Tail => APIKey::BZ,
1248
_ => APIKey::BSP,
1249
}
1250
} else {
1251
match call_conv {
1252
isa::CallConv::Tail => APIKey::AZ,
1253
_ => APIKey::ASP,
1254
}
1255
})
1256
} else {
1257
None
1258
}
1259
}
1260
}
1261
1262
/// Is the given register saved in the prologue if clobbered, i.e., is it a
1263
/// callee-save?
1264
fn is_reg_saved_in_prologue(
1265
_call_conv: isa::CallConv,
1266
enable_pinned_reg: bool,
1267
sig: &Signature,
1268
r: RealReg,
1269
) -> bool {
1270
// FIXME: We need to inspect whether a function is returning Z or P regs too.
1271
let save_z_regs = sig
1272
.params
1273
.iter()
1274
.filter(|p| p.value_type.is_dynamic_vector())
1275
.count()
1276
!= 0;
1277
1278
match r.class() {
1279
RegClass::Int => {
1280
// x19 - x28 inclusive are callee-saves.
1281
// However, x21 is the pinned reg if `enable_pinned_reg`
1282
// is set, and is implicitly globally-allocated, hence not
1283
// callee-saved in prologues.
1284
if enable_pinned_reg && r.hw_enc() == PINNED_REG {
1285
false
1286
} else {
1287
r.hw_enc() >= 19 && r.hw_enc() <= 28
1288
}
1289
}
1290
RegClass::Float => {
1291
// If a subroutine takes at least one argument in scalable vector registers
1292
// or scalable predicate registers, or if it is a function that returns
1293
// results in such registers, it must ensure that the entire contents of
1294
// z8-z23 are preserved across the call. In other cases it need only
1295
// preserve the low 64 bits of z8-z15.
1296
if save_z_regs {
1297
r.hw_enc() >= 8 && r.hw_enc() <= 23
1298
} else {
1299
// v8 - v15 inclusive are callee-saves.
1300
r.hw_enc() >= 8 && r.hw_enc() <= 15
1301
}
1302
}
1303
RegClass::Vector => unreachable!(),
1304
}
1305
}
1306
1307
const fn default_aapcs_clobbers() -> PRegSet {
1308
PRegSet::empty()
1309
// x0 - x17 inclusive are caller-saves.
1310
.with(xreg_preg(0))
1311
.with(xreg_preg(1))
1312
.with(xreg_preg(2))
1313
.with(xreg_preg(3))
1314
.with(xreg_preg(4))
1315
.with(xreg_preg(5))
1316
.with(xreg_preg(6))
1317
.with(xreg_preg(7))
1318
.with(xreg_preg(8))
1319
.with(xreg_preg(9))
1320
.with(xreg_preg(10))
1321
.with(xreg_preg(11))
1322
.with(xreg_preg(12))
1323
.with(xreg_preg(13))
1324
.with(xreg_preg(14))
1325
.with(xreg_preg(15))
1326
.with(xreg_preg(16))
1327
.with(xreg_preg(17))
1328
// v0 - v7 inclusive and v16 - v31 inclusive are
1329
// caller-saves. The upper 64 bits of v8 - v15 inclusive are
1330
// also caller-saves. However, because we cannot currently
1331
// represent partial registers to regalloc2, we indicate here
1332
// that every vector register is caller-save. Because this
1333
// function is used at *callsites*, approximating in this
1334
// direction (save more than necessary) is conservative and
1335
// thus safe.
1336
//
1337
// Note that we exclude clobbers from a call instruction when
1338
// a call instruction's callee has the same ABI as the caller
1339
// (the current function body); this is safe (anything
1340
// clobbered by callee can be clobbered by caller as well) and
1341
// avoids unnecessary saves of v8-v15 in the prologue even
1342
// though we include them as defs here.
1343
.with(vreg_preg(0))
1344
.with(vreg_preg(1))
1345
.with(vreg_preg(2))
1346
.with(vreg_preg(3))
1347
.with(vreg_preg(4))
1348
.with(vreg_preg(5))
1349
.with(vreg_preg(6))
1350
.with(vreg_preg(7))
1351
.with(vreg_preg(8))
1352
.with(vreg_preg(9))
1353
.with(vreg_preg(10))
1354
.with(vreg_preg(11))
1355
.with(vreg_preg(12))
1356
.with(vreg_preg(13))
1357
.with(vreg_preg(14))
1358
.with(vreg_preg(15))
1359
.with(vreg_preg(16))
1360
.with(vreg_preg(17))
1361
.with(vreg_preg(18))
1362
.with(vreg_preg(19))
1363
.with(vreg_preg(20))
1364
.with(vreg_preg(21))
1365
.with(vreg_preg(22))
1366
.with(vreg_preg(23))
1367
.with(vreg_preg(24))
1368
.with(vreg_preg(25))
1369
.with(vreg_preg(26))
1370
.with(vreg_preg(27))
1371
.with(vreg_preg(28))
1372
.with(vreg_preg(29))
1373
.with(vreg_preg(30))
1374
.with(vreg_preg(31))
1375
}
1376
1377
const fn winch_clobbers() -> PRegSet {
1378
PRegSet::empty()
1379
.with(xreg_preg(0))
1380
.with(xreg_preg(1))
1381
.with(xreg_preg(2))
1382
.with(xreg_preg(3))
1383
.with(xreg_preg(4))
1384
.with(xreg_preg(5))
1385
.with(xreg_preg(6))
1386
.with(xreg_preg(7))
1387
.with(xreg_preg(8))
1388
.with(xreg_preg(9))
1389
.with(xreg_preg(10))
1390
.with(xreg_preg(11))
1391
.with(xreg_preg(12))
1392
.with(xreg_preg(13))
1393
.with(xreg_preg(14))
1394
.with(xreg_preg(15))
1395
.with(xreg_preg(16))
1396
.with(xreg_preg(17))
1397
// x18 is used to carry platform state and is not allocatable by Winch.
1398
//
1399
// x19 - x27 are considered caller-saved in Winch's calling convention.
1400
.with(xreg_preg(19))
1401
.with(xreg_preg(20))
1402
.with(xreg_preg(21))
1403
.with(xreg_preg(22))
1404
.with(xreg_preg(23))
1405
.with(xreg_preg(24))
1406
.with(xreg_preg(25))
1407
.with(xreg_preg(26))
1408
.with(xreg_preg(27))
1409
// x28 is used as the shadow stack pointer and is considered
1410
// callee-saved.
1411
//
1412
// All vregs are considered caller-saved.
1413
.with(vreg_preg(0))
1414
.with(vreg_preg(1))
1415
.with(vreg_preg(2))
1416
.with(vreg_preg(3))
1417
.with(vreg_preg(4))
1418
.with(vreg_preg(5))
1419
.with(vreg_preg(6))
1420
.with(vreg_preg(7))
1421
.with(vreg_preg(8))
1422
.with(vreg_preg(9))
1423
.with(vreg_preg(10))
1424
.with(vreg_preg(11))
1425
.with(vreg_preg(12))
1426
.with(vreg_preg(13))
1427
.with(vreg_preg(14))
1428
.with(vreg_preg(15))
1429
.with(vreg_preg(16))
1430
.with(vreg_preg(17))
1431
.with(vreg_preg(18))
1432
.with(vreg_preg(19))
1433
.with(vreg_preg(20))
1434
.with(vreg_preg(21))
1435
.with(vreg_preg(22))
1436
.with(vreg_preg(23))
1437
.with(vreg_preg(24))
1438
.with(vreg_preg(25))
1439
.with(vreg_preg(26))
1440
.with(vreg_preg(27))
1441
.with(vreg_preg(28))
1442
.with(vreg_preg(29))
1443
.with(vreg_preg(30))
1444
.with(vreg_preg(31))
1445
}
1446
1447
const fn all_clobbers() -> PRegSet {
1448
PRegSet::empty()
1449
// integer registers: x0 to x28 inclusive. (x29 is FP, x30 is
1450
// LR, x31 is SP/ZR.)
1451
.with(xreg_preg(0))
1452
.with(xreg_preg(1))
1453
.with(xreg_preg(2))
1454
.with(xreg_preg(3))
1455
.with(xreg_preg(4))
1456
.with(xreg_preg(5))
1457
.with(xreg_preg(6))
1458
.with(xreg_preg(7))
1459
.with(xreg_preg(8))
1460
.with(xreg_preg(9))
1461
.with(xreg_preg(10))
1462
.with(xreg_preg(11))
1463
.with(xreg_preg(12))
1464
.with(xreg_preg(13))
1465
.with(xreg_preg(14))
1466
.with(xreg_preg(15))
1467
.with(xreg_preg(16))
1468
.with(xreg_preg(17))
1469
.with(xreg_preg(18))
1470
.with(xreg_preg(19))
1471
.with(xreg_preg(20))
1472
.with(xreg_preg(21))
1473
.with(xreg_preg(22))
1474
.with(xreg_preg(23))
1475
.with(xreg_preg(24))
1476
.with(xreg_preg(25))
1477
.with(xreg_preg(26))
1478
.with(xreg_preg(27))
1479
.with(xreg_preg(28))
1480
// vector registers: v0 to v31 inclusive.
1481
.with(vreg_preg(0))
1482
.with(vreg_preg(1))
1483
.with(vreg_preg(2))
1484
.with(vreg_preg(3))
1485
.with(vreg_preg(4))
1486
.with(vreg_preg(5))
1487
.with(vreg_preg(6))
1488
.with(vreg_preg(7))
1489
.with(vreg_preg(8))
1490
.with(vreg_preg(9))
1491
.with(vreg_preg(10))
1492
.with(vreg_preg(11))
1493
.with(vreg_preg(12))
1494
.with(vreg_preg(13))
1495
.with(vreg_preg(14))
1496
.with(vreg_preg(15))
1497
.with(vreg_preg(16))
1498
.with(vreg_preg(17))
1499
.with(vreg_preg(18))
1500
.with(vreg_preg(19))
1501
.with(vreg_preg(20))
1502
.with(vreg_preg(21))
1503
.with(vreg_preg(22))
1504
.with(vreg_preg(23))
1505
.with(vreg_preg(24))
1506
.with(vreg_preg(25))
1507
.with(vreg_preg(26))
1508
.with(vreg_preg(27))
1509
.with(vreg_preg(28))
1510
.with(vreg_preg(29))
1511
.with(vreg_preg(30))
1512
.with(vreg_preg(31))
1513
}
1514
1515
const DEFAULT_AAPCS_CLOBBERS: PRegSet = default_aapcs_clobbers();
1516
const WINCH_CLOBBERS: PRegSet = winch_clobbers();
1517
const ALL_CLOBBERS: PRegSet = all_clobbers();
1518
1519
fn create_reg_env(enable_pinned_reg: bool) -> MachineEnv {
1520
fn preg(r: Reg) -> PReg {
1521
r.to_real_reg().unwrap().into()
1522
}
1523
1524
let mut env = MachineEnv {
1525
preferred_regs_by_class: [
1526
vec![
1527
preg(xreg(0)),
1528
preg(xreg(1)),
1529
preg(xreg(2)),
1530
preg(xreg(3)),
1531
preg(xreg(4)),
1532
preg(xreg(5)),
1533
preg(xreg(6)),
1534
preg(xreg(7)),
1535
preg(xreg(8)),
1536
preg(xreg(9)),
1537
preg(xreg(10)),
1538
preg(xreg(11)),
1539
preg(xreg(12)),
1540
preg(xreg(13)),
1541
preg(xreg(14)),
1542
preg(xreg(15)),
1543
// x16 and x17 are spilltmp and tmp2 (see above).
1544
// x18 could be used by the platform to carry inter-procedural state;
1545
// conservatively assume so and make it not allocatable.
1546
// x19-28 are callee-saved and so not preferred.
1547
// x21 is the pinned register (if enabled) and not allocatable if so.
1548
// x29 is FP, x30 is LR, x31 is SP/ZR.
1549
],
1550
vec![
1551
preg(vreg(0)),
1552
preg(vreg(1)),
1553
preg(vreg(2)),
1554
preg(vreg(3)),
1555
preg(vreg(4)),
1556
preg(vreg(5)),
1557
preg(vreg(6)),
1558
preg(vreg(7)),
1559
// v8-15 are callee-saved and so not preferred.
1560
preg(vreg(16)),
1561
preg(vreg(17)),
1562
preg(vreg(18)),
1563
preg(vreg(19)),
1564
preg(vreg(20)),
1565
preg(vreg(21)),
1566
preg(vreg(22)),
1567
preg(vreg(23)),
1568
preg(vreg(24)),
1569
preg(vreg(25)),
1570
preg(vreg(26)),
1571
preg(vreg(27)),
1572
preg(vreg(28)),
1573
preg(vreg(29)),
1574
preg(vreg(30)),
1575
preg(vreg(31)),
1576
],
1577
// Vector Regclass is unused
1578
vec![],
1579
],
1580
non_preferred_regs_by_class: [
1581
vec![
1582
preg(xreg(19)),
1583
preg(xreg(20)),
1584
// x21 is pinned reg if enabled; we add to this list below if not.
1585
preg(xreg(22)),
1586
preg(xreg(23)),
1587
preg(xreg(24)),
1588
preg(xreg(25)),
1589
preg(xreg(26)),
1590
preg(xreg(27)),
1591
preg(xreg(28)),
1592
],
1593
vec![
1594
preg(vreg(8)),
1595
preg(vreg(9)),
1596
preg(vreg(10)),
1597
preg(vreg(11)),
1598
preg(vreg(12)),
1599
preg(vreg(13)),
1600
preg(vreg(14)),
1601
preg(vreg(15)),
1602
],
1603
// Vector Regclass is unused
1604
vec![],
1605
],
1606
fixed_stack_slots: vec![],
1607
scratch_by_class: [None, None, None],
1608
};
1609
1610
if !enable_pinned_reg {
1611
debug_assert_eq!(PINNED_REG, 21); // We assumed this above in hardcoded reg list.
1612
env.non_preferred_regs_by_class[0].push(preg(xreg(PINNED_REG)));
1613
}
1614
1615
env
1616
}
1617
1618