Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/x64/abi.rs
3073 views
1
//! Implementation of the standard x64 ABI.
2
3
use crate::CodegenResult;
4
use crate::ir::{self, LibCall, MemFlags, Signature, TrapCode, types};
5
use crate::ir::{ExternalName, types::*};
6
use crate::isa;
7
use crate::isa::winch;
8
use crate::isa::{CallConv, unwind::UnwindInst, x64::inst::*, x64::settings as x64_settings};
9
use crate::machinst::abi::*;
10
use crate::machinst::*;
11
use crate::settings;
12
use alloc::borrow::ToOwned;
13
use alloc::boxed::Box;
14
use alloc::vec::Vec;
15
use args::*;
16
use cranelift_assembler_x64 as asm;
17
use regalloc2::{MachineEnv, PReg, PRegSet};
18
use smallvec::{SmallVec, smallvec};
19
use std::sync::OnceLock;
20
21
/// Support for the x64 ABI from the callee side (within a function body).
22
pub(crate) type X64Callee = Callee<X64ABIMachineSpec>;
23
24
/// Implementation of ABI primitives for x64.
25
pub struct X64ABIMachineSpec;
26
27
impl X64ABIMachineSpec {
28
fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
29
insts.reserve(probe_count as usize);
30
for _ in 0..probe_count {
31
// "Allocate" stack space for the probe by decrementing the stack pointer before
32
// the write. This is required to make valgrind happy.
33
// See: https://github.com/bytecodealliance/wasmtime/issues/7454
34
insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
35
36
// Touch the current page by storing an immediate zero.
37
// mov [rsp], 0
38
insts.push(Inst::External {
39
inst: asm::inst::movl_mi::new(Amode::imm_reg(0, regs::rsp()), 0i32.cast_unsigned())
40
.into(),
41
});
42
}
43
44
// Restore the stack pointer to its original value
45
insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
46
}
47
48
fn gen_probestack_loop(
49
insts: &mut SmallInstVec<Inst>,
50
_call_conv: isa::CallConv,
51
frame_size: u32,
52
guard_size: u32,
53
) {
54
// We have to use a caller-saved register since clobbering only
55
// happens after stack probing.
56
// `r11` is caller saved on both Fastcall and SystemV, and not used
57
// for argument passing, so it's pretty much free. It is also not
58
// used by the stacklimit mechanism.
59
let tmp = regs::r11();
60
debug_assert!({
61
let real_reg = tmp.to_real_reg().unwrap();
62
!is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)
63
});
64
65
insts.push(Inst::StackProbeLoop {
66
tmp: Writable::from_reg(tmp),
67
frame_size,
68
guard_size,
69
});
70
}
71
}
72
73
impl IsaFlags for x64_settings::Flags {}
74
75
impl ABIMachineSpec for X64ABIMachineSpec {
76
type I = Inst;
77
78
type F = x64_settings::Flags;
79
80
/// This is the limit for the size of argument and return-value areas on the
81
/// stack. We place a reasonable limit here to avoid integer overflow issues
82
/// with 32-bit arithmetic: for now, 128 MB.
83
const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
84
85
fn word_bits() -> u32 {
86
64
87
}
88
89
/// Return required stack alignment in bytes.
90
fn stack_align(_call_conv: isa::CallConv) -> u32 {
91
16
92
}
93
94
fn compute_arg_locs(
95
call_conv: isa::CallConv,
96
flags: &settings::Flags,
97
params: &[ir::AbiParam],
98
args_or_rets: ArgsOrRets,
99
add_ret_area_ptr: bool,
100
mut args: ArgsAccumulator,
101
) -> CodegenResult<(u32, Option<usize>)> {
102
let is_fastcall = call_conv == CallConv::WindowsFastcall;
103
let is_tail = call_conv == CallConv::Tail;
104
105
let mut next_gpr = 0;
106
let mut next_vreg = 0;
107
let mut next_stack: u32 = 0;
108
let mut next_param_idx = 0; // Fastcall cares about overall param index
109
110
if args_or_rets == ArgsOrRets::Args && is_fastcall {
111
// Fastcall always reserves 32 bytes of shadow space corresponding to
112
// the four initial in-arg parameters.
113
//
114
// (See:
115
// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170)
116
next_stack = 32;
117
}
118
119
let ret_area_ptr = if add_ret_area_ptr {
120
debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
121
next_gpr += 1;
122
next_param_idx += 1;
123
// In the SystemV and WindowsFastcall ABIs, the return area pointer is the first
124
// argument. For the Tail and Winch ABIs we do the same for simplicity sake.
125
Some(ABIArg::reg(
126
get_intreg_for_arg(call_conv, 0, 0)
127
.unwrap()
128
.to_real_reg()
129
.unwrap(),
130
types::I64,
131
ir::ArgumentExtension::None,
132
ir::ArgumentPurpose::Normal,
133
))
134
} else {
135
None
136
};
137
138
// If any param uses extension, the winch calling convention will not pack its results
139
// on the stack and will instead align them to 8-byte boundaries the same way that all the
140
// other calling conventions do. This isn't consistent with Winch itself, but is fine as
141
// Winch only uses this calling convention via trampolines, and those trampolines don't add
142
// extension annotations. Additionally, handling extension attributes this way allows clif
143
// functions that use them with the Winch calling convention to interact successfully with
144
// testing infrastructure.
145
// The results are also not packed if any of the types are `f16`. This is to simplify the
146
// implementation of `Inst::load`/`Inst::store` (which would otherwise require multiple
147
// instructions), and doesn't affect Winch itself as Winch doesn't support `f16` at all.
148
let uses_extension = params.iter().any(|p| {
149
p.extension != ir::ArgumentExtension::None
150
|| p.value_type == types::F16
151
|| p.value_type == types::I8X2
152
});
153
154
for (ix, param) in params.iter().enumerate() {
155
let last_param = ix == params.len() - 1;
156
157
if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
158
let offset = next_stack as i64;
159
let size = size;
160
assert!(size % 8 == 0, "StructArgument size is not properly aligned");
161
next_stack += size;
162
args.push(ABIArg::StructArg {
163
offset,
164
size: size as u64,
165
purpose: param.purpose,
166
});
167
continue;
168
}
169
170
// Find regclass(es) of the register(s) used to store a value of this type.
171
let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
172
173
// Now assign ABIArgSlots for each register-sized part.
174
//
175
// Note that the handling of `i128` values is unique here:
176
//
177
// - If `enable_llvm_abi_extensions` is set in the flags, each
178
// `i128` is split into two `i64`s and assigned exactly as if it
179
// were two consecutive 64-bit args, except that if one of the
180
// two halves is forced onto the stack, the other half is too.
181
// This is consistent with LLVM's behavior, and is needed for
182
// some uses of Cranelift (e.g., the rustc backend).
183
//
184
// - Otherwise, if the calling convention is Tail, we behave as in
185
// the previous case, even if `enable_llvm_abi_extensions` is not
186
// set in the flags: This is a custom calling convention defined
187
// by Cranelift, LLVM doesn't know about it.
188
//
189
// - Otherwise, both SysV and Fastcall specify behavior (use of
190
// vector register, a register pair, or passing by reference
191
// depending on the case), but for simplicity, we will just panic if
192
// an i128 type appears in a signature and the LLVM extensions flag
193
// is not set.
194
//
195
// For examples of how rustc compiles i128 args and return values on
196
// both SysV and Fastcall platforms, see:
197
// https://godbolt.org/z/PhG3ob
198
199
if param.value_type.bits() > 64
200
&& !(param.value_type.is_vector() || param.value_type.is_float())
201
&& !flags.enable_llvm_abi_extensions()
202
&& !is_tail
203
{
204
panic!(
205
"i128 args/return values not supported unless LLVM ABI extensions are enabled"
206
);
207
}
208
// As MSVC doesn't support f16/f128 there is no standard way to pass/return them with
209
// the Windows ABI. LLVM passes/returns them in XMM registers.
210
if matches!(param.value_type, types::F16 | types::F128)
211
&& is_fastcall
212
&& !flags.enable_llvm_abi_extensions()
213
{
214
panic!(
215
"f16/f128 args/return values not supported for windows_fastcall unless LLVM ABI extensions are enabled"
216
);
217
}
218
219
// Windows fastcall dictates that `__m128i` and `f128` parameters to
220
// a function are passed indirectly as pointers, so handle that as a
221
// special case before the loop below.
222
if (param.value_type.is_vector() || param.value_type.is_float())
223
&& param.value_type.bits() >= 128
224
&& args_or_rets == ArgsOrRets::Args
225
&& is_fastcall
226
{
227
let pointer = match get_intreg_for_arg(call_conv, next_gpr, next_param_idx) {
228
Some(reg) => {
229
next_gpr += 1;
230
ABIArgSlot::Reg {
231
reg: reg.to_real_reg().unwrap(),
232
ty: ir::types::I64,
233
extension: ir::ArgumentExtension::None,
234
}
235
}
236
237
None => {
238
next_stack = align_to(next_stack, 8) + 8;
239
ABIArgSlot::Stack {
240
offset: (next_stack - 8) as i64,
241
ty: ir::types::I64,
242
extension: param.extension,
243
}
244
}
245
};
246
next_param_idx += 1;
247
args.push(ABIArg::ImplicitPtrArg {
248
// NB: this is filled in after this loop
249
offset: 0,
250
pointer,
251
ty: param.value_type,
252
purpose: param.purpose,
253
});
254
continue;
255
}
256
257
// SystemV dictates that 128bit int parameters are always either
258
// passed in two registers or on the stack, so handle that as a
259
// special case before the loop below.
260
if param.value_type == types::I128
261
&& args_or_rets == ArgsOrRets::Args
262
&& call_conv == CallConv::SystemV
263
{
264
let mut slots = ABIArgSlotVec::new();
265
match (
266
get_intreg_for_arg(CallConv::SystemV, next_gpr, next_param_idx),
267
get_intreg_for_arg(CallConv::SystemV, next_gpr + 1, next_param_idx + 1),
268
) {
269
(Some(reg1), Some(reg2)) => {
270
slots.push(ABIArgSlot::Reg {
271
reg: reg1.to_real_reg().unwrap(),
272
ty: ir::types::I64,
273
extension: ir::ArgumentExtension::None,
274
});
275
slots.push(ABIArgSlot::Reg {
276
reg: reg2.to_real_reg().unwrap(),
277
ty: ir::types::I64,
278
extension: ir::ArgumentExtension::None,
279
});
280
}
281
_ => {
282
let size = 16;
283
284
// Align.
285
next_stack = align_to(next_stack, size);
286
287
slots.push(ABIArgSlot::Stack {
288
offset: next_stack as i64,
289
ty: ir::types::I64,
290
extension: param.extension,
291
});
292
slots.push(ABIArgSlot::Stack {
293
offset: next_stack as i64 + 8,
294
ty: ir::types::I64,
295
extension: param.extension,
296
});
297
next_stack += size;
298
}
299
};
300
// Unconditionally increment next_gpr even when storing the
301
// argument on the stack to prevent reusing a possibly
302
// remaining register for the next argument.
303
next_gpr += 2;
304
next_param_idx += 2;
305
306
args.push(ABIArg::Slots {
307
slots,
308
purpose: param.purpose,
309
});
310
continue;
311
}
312
313
let mut slots = ABIArgSlotVec::new();
314
for (ix, (rc, reg_ty)) in rcs.iter().zip(reg_tys.iter()).enumerate() {
315
let last_slot = last_param && ix == rcs.len() - 1;
316
317
let intreg = *rc == RegClass::Int;
318
let nextreg = if intreg {
319
match args_or_rets {
320
ArgsOrRets::Args => get_intreg_for_arg(call_conv, next_gpr, next_param_idx),
321
ArgsOrRets::Rets => {
322
get_intreg_for_retval(call_conv, flags, next_gpr, last_slot)
323
}
324
}
325
} else {
326
match args_or_rets {
327
ArgsOrRets::Args => {
328
get_fltreg_for_arg(call_conv, next_vreg, next_param_idx)
329
}
330
ArgsOrRets::Rets => get_fltreg_for_retval(call_conv, next_vreg, last_slot),
331
}
332
};
333
next_param_idx += 1;
334
if let Some(reg) = nextreg {
335
if intreg {
336
next_gpr += 1;
337
} else {
338
next_vreg += 1;
339
}
340
slots.push(ABIArgSlot::Reg {
341
reg: reg.to_real_reg().unwrap(),
342
ty: *reg_ty,
343
extension: param.extension,
344
});
345
} else {
346
if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
347
return Err(crate::CodegenError::Unsupported(
348
"Too many return values to fit in registers. \
349
Use a StructReturn argument instead. (#9510)"
350
.to_owned(),
351
));
352
}
353
354
let size = reg_ty.bytes();
355
let size = if call_conv == CallConv::Winch
356
&& args_or_rets == ArgsOrRets::Rets
357
&& !uses_extension
358
{
359
size
360
} else {
361
let size = core::cmp::max(size, 8);
362
363
// Align.
364
debug_assert!(size.is_power_of_two());
365
next_stack = align_to(next_stack, size);
366
size
367
};
368
369
slots.push(ABIArgSlot::Stack {
370
offset: next_stack as i64,
371
ty: *reg_ty,
372
extension: param.extension,
373
});
374
next_stack += size;
375
}
376
}
377
378
args.push(ABIArg::Slots {
379
slots,
380
purpose: param.purpose,
381
});
382
}
383
384
// Fastcall's indirect 128+ bit vector arguments are all located on the
385
// stack, and stack space is reserved after all parameters are passed,
386
// so allocate from the space now.
387
if args_or_rets == ArgsOrRets::Args && is_fastcall {
388
for arg in args.args_mut() {
389
if let ABIArg::ImplicitPtrArg { offset, .. } = arg {
390
assert_eq!(*offset, 0);
391
next_stack = align_to(next_stack, 16);
392
*offset = next_stack as i64;
393
next_stack += 16;
394
}
395
}
396
}
397
let extra_arg_idx = if let Some(ret_area_ptr) = ret_area_ptr {
398
args.push_non_formal(ret_area_ptr);
399
Some(args.args().len() - 1)
400
} else {
401
None
402
};
403
404
// Winch writes the first result to the highest offset, so we need to iterate through the
405
// args and adjust the offsets down.
406
if call_conv == CallConv::Winch && args_or_rets == ArgsOrRets::Rets {
407
winch::reverse_stack(args, next_stack, uses_extension);
408
}
409
410
next_stack = align_to(next_stack, 16);
411
412
Ok((next_stack, extra_arg_idx))
413
}
414
415
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
416
// For integer-typed values, we always load a full 64 bits (and we always spill a full 64
417
// bits as well -- see `Inst::store()`).
418
let ty = match ty {
419
types::I8 | types::I16 | types::I32 => types::I64,
420
// Stack slots are always at least 8 bytes, so it's fine to load 4 bytes instead of only
421
// two.
422
types::F16 | types::I8X2 => types::F32,
423
_ => ty,
424
};
425
Inst::load(ty, mem, into_reg, ExtKind::None)
426
}
427
428
fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
429
let ty = match ty {
430
// See `gen_load_stack`.
431
types::F16 | types::I8X2 => types::F32,
432
_ => ty,
433
};
434
Inst::store(ty, from_reg, mem)
435
}
436
437
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
438
Inst::gen_move(to_reg, from_reg, ty)
439
}
440
441
/// Generate an integer-extend operation.
442
fn gen_extend(
443
to_reg: Writable<Reg>,
444
from_reg: Reg,
445
is_signed: bool,
446
from_bits: u8,
447
to_bits: u8,
448
) -> Self::I {
449
let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)
450
.unwrap_or_else(|| panic!("invalid extension: {from_bits} -> {to_bits}"));
451
if is_signed {
452
Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
453
} else {
454
Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
455
}
456
}
457
458
fn gen_args(args: Vec<ArgPair>) -> Inst {
459
Inst::Args { args }
460
}
461
462
fn gen_rets(rets: Vec<RetPair>) -> Inst {
463
Inst::Rets { rets }
464
}
465
466
fn gen_add_imm(
467
_call_conv: isa::CallConv,
468
into_reg: Writable<Reg>,
469
from_reg: Reg,
470
imm: u32,
471
) -> SmallInstVec<Self::I> {
472
let mut ret = SmallVec::new();
473
if from_reg != into_reg.to_reg() {
474
ret.push(Inst::gen_move(into_reg, from_reg, I64));
475
}
476
let imm = i32::try_from(imm).expect("`imm` is too large to fit in a 32-bit immediate");
477
ret.push(Inst::addq_mi(into_reg, imm));
478
ret
479
}
480
481
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {
482
smallvec![
483
Inst::External {
484
inst: asm::inst::cmpq_rm::new(Gpr::unwrap_new(limit_reg), Gpr::RSP,).into(),
485
},
486
Inst::TrapIf {
487
// NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
488
cc: CC::NBE,
489
trap_code: TrapCode::STACK_OVERFLOW,
490
},
491
]
492
}
493
494
fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Self::I {
495
let mem: SyntheticAmode = mem.into();
496
Inst::External {
497
inst: asm::inst::leaq_rm::new(into_reg, mem).into(),
498
}
499
}
500
501
fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
502
// As per comment on trait definition, we must return a caller-save
503
// register that is not used as an argument here.
504
debug_assert!(!is_callee_save_systemv(
505
regs::r10().to_real_reg().unwrap(),
506
false
507
));
508
regs::r10()
509
}
510
511
fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
512
// Only ever used for I64s, F128s and vectors; if that changes, see if
513
// the ExtKind below needs to be changed.
514
assert!(ty == I64 || ty.is_vector() || ty == F128);
515
let mem = Amode::imm_reg(offset, base);
516
Inst::load(ty, mem, into_reg, ExtKind::None)
517
}
518
519
fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
520
let ty = match ty {
521
// See `gen_load_stack`.
522
types::F16 | types::I8X2 => types::F32,
523
_ => ty,
524
};
525
let mem = Amode::imm_reg(offset, base);
526
Inst::store(ty, from_reg, mem)
527
}
528
529
fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
530
let rsp = Writable::from_reg(regs::rsp());
531
let inst = if amount >= 0 {
532
Inst::addq_mi(rsp, amount)
533
} else {
534
Inst::subq_mi(rsp, -amount)
535
};
536
smallvec![inst]
537
}
538
539
fn gen_prologue_frame_setup(
540
_call_conv: isa::CallConv,
541
flags: &settings::Flags,
542
_isa_flags: &x64_settings::Flags,
543
frame_layout: &FrameLayout,
544
) -> SmallInstVec<Self::I> {
545
let r_rsp = Gpr::RSP;
546
let r_rbp = Gpr::RBP;
547
let w_rbp = Writable::from_reg(r_rbp);
548
let mut insts = SmallVec::new();
549
// `push %rbp`
550
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
551
insts.push(Inst::External {
552
inst: asm::inst::pushq_o::new(r_rbp).into(),
553
});
554
555
if flags.unwind_info() {
556
insts.push(Inst::Unwind {
557
inst: UnwindInst::PushFrameRegs {
558
offset_upward_to_caller_sp: frame_layout.setup_area_size,
559
},
560
});
561
}
562
563
// `mov %rsp, %rbp`
564
// RSP is now 0 % 16
565
insts.push(Inst::External {
566
inst: asm::inst::movq_mr::new(w_rbp, r_rsp).into(),
567
});
568
569
insts
570
}
571
572
fn gen_epilogue_frame_restore(
573
_call_conv: isa::CallConv,
574
_flags: &settings::Flags,
575
_isa_flags: &x64_settings::Flags,
576
_frame_layout: &FrameLayout,
577
) -> SmallInstVec<Self::I> {
578
let rbp = Gpr::RBP;
579
let rsp = Gpr::RSP;
580
581
let mut insts = SmallVec::new();
582
// `mov %rbp, %rsp`
583
insts.push(Inst::External {
584
inst: asm::inst::movq_mr::new(Writable::from_reg(rsp), rbp).into(),
585
});
586
// `pop %rbp`
587
insts.push(Inst::External {
588
inst: asm::inst::popq_o::new(Writable::from_reg(rbp)).into(),
589
});
590
insts
591
}
592
593
fn gen_return(
594
call_conv: CallConv,
595
_isa_flags: &x64_settings::Flags,
596
frame_layout: &FrameLayout,
597
) -> SmallInstVec<Self::I> {
598
// Emit return instruction.
599
let stack_bytes_to_pop = if call_conv == CallConv::Tail {
600
frame_layout.tail_args_size
601
} else {
602
0
603
};
604
let inst = if stack_bytes_to_pop == 0 {
605
asm::inst::retq_zo::new().into()
606
} else {
607
let stack_bytes_to_pop = u16::try_from(stack_bytes_to_pop).unwrap();
608
asm::inst::retq_i::new(stack_bytes_to_pop).into()
609
};
610
smallvec![Inst::External { inst }]
611
}
612
613
fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
614
insts.push(Inst::imm(
615
OperandSize::Size32,
616
frame_size as u64,
617
Writable::from_reg(regs::rax()),
618
));
619
insts.push(Inst::CallKnown {
620
// No need to include arg here: we are post-regalloc
621
// so no constraints will be seen anyway.
622
info: Box::new(CallInfo::empty(
623
ExternalName::LibCall(LibCall::Probestack),
624
CallConv::Probestack,
625
)),
626
});
627
}
628
629
fn gen_inline_probestack(
630
insts: &mut SmallInstVec<Self::I>,
631
call_conv: isa::CallConv,
632
frame_size: u32,
633
guard_size: u32,
634
) {
635
// Unroll at most n consecutive probes, before falling back to using a loop
636
//
637
// This was number was picked because the loop version is 38 bytes long. We can fit
638
// 4 inline probes in that space, so unroll if its beneficial in terms of code size.
639
const PROBE_MAX_UNROLL: u32 = 4;
640
641
// Calculate how many probes we need to perform. Round down, as we only
642
// need to probe whole guard_size regions we'd otherwise skip over.
643
let probe_count = frame_size / guard_size;
644
if probe_count == 0 {
645
// No probe necessary
646
} else if probe_count <= PROBE_MAX_UNROLL {
647
Self::gen_probestack_unroll(insts, guard_size, probe_count)
648
} else {
649
Self::gen_probestack_loop(insts, call_conv, frame_size, guard_size)
650
}
651
}
652
653
fn gen_clobber_save(
654
_call_conv: isa::CallConv,
655
flags: &settings::Flags,
656
frame_layout: &FrameLayout,
657
) -> SmallVec<[Self::I; 16]> {
658
let mut insts = SmallVec::new();
659
660
// When a return_call within this function required more stack arguments than we have
661
// present, resize the incoming argument area of the frame to accommodate those arguments.
662
let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
663
if incoming_args_diff > 0 {
664
// Decrement the stack pointer to make space for the new arguments.
665
let rsp = Writable::from_reg(regs::rsp());
666
insts.push(Inst::subq_mi(
667
rsp,
668
i32::try_from(incoming_args_diff)
669
.expect("`incoming_args_diff` is too large to fit in a 32-bit immediate"),
670
));
671
672
// Make sure to keep the frame pointer and stack pointer in sync at
673
// this point.
674
let rbp = Gpr::RBP;
675
let rsp = Gpr::RSP;
676
insts.push(Inst::External {
677
inst: asm::inst::movq_mr::new(Writable::from_reg(rbp), rsp).into(),
678
});
679
680
let incoming_args_diff = i32::try_from(incoming_args_diff).unwrap();
681
682
// Move the saved frame pointer down by `incoming_args_diff`.
683
let addr = Amode::imm_reg(incoming_args_diff, regs::rsp());
684
let r11 = Writable::from_reg(Gpr::R11);
685
let inst = asm::inst::movq_rm::new(r11, addr).into();
686
insts.push(Inst::External { inst });
687
let inst = asm::inst::movq_mr::new(Amode::imm_reg(0, regs::rsp()), r11.to_reg()).into();
688
insts.push(Inst::External { inst });
689
690
// Move the saved return address down by `incoming_args_diff`.
691
let addr = Amode::imm_reg(incoming_args_diff + 8, regs::rsp());
692
let inst = asm::inst::movq_rm::new(r11, addr).into();
693
insts.push(Inst::External { inst });
694
let inst = asm::inst::movq_mr::new(Amode::imm_reg(8, regs::rsp()), r11.to_reg()).into();
695
insts.push(Inst::External { inst });
696
}
697
698
// We need to factor `incoming_args_diff` into the offset upward here, as we have grown
699
// the argument area -- `setup_area_size` alone will not be the correct offset up to the
700
// original caller's SP.
701
let offset_upward_to_caller_sp = frame_layout.setup_area_size + incoming_args_diff;
702
if flags.unwind_info() && offset_upward_to_caller_sp > 0 {
703
// Emit unwind info: start the frame. The frame (from unwind
704
// consumers' point of view) starts at clobbbers, just below
705
// the FP and return address. Spill slots and stack slots are
706
// part of our actual frame but do not concern the unwinder.
707
insts.push(Inst::Unwind {
708
inst: UnwindInst::DefineNewFrame {
709
offset_downward_to_clobbers: frame_layout.clobber_size,
710
offset_upward_to_caller_sp,
711
},
712
});
713
}
714
715
// Adjust the stack pointer downward for clobbers and the function fixed
716
// frame (spillslots, storage slots, and argument area).
717
let stack_size = frame_layout.fixed_frame_storage_size
718
+ frame_layout.clobber_size
719
+ frame_layout.outgoing_args_size;
720
if stack_size > 0 {
721
let rsp = Writable::from_reg(regs::rsp());
722
let stack_size = i32::try_from(stack_size)
723
.expect("`stack_size` is too large to fit in a 32-bit immediate");
724
insts.push(Inst::subq_mi(rsp, stack_size));
725
}
726
727
// Store each clobbered register in order at offsets from RSP,
728
// placing them above the fixed frame slots.
729
let clobber_offset =
730
frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
731
let mut cur_offset = 0;
732
for reg in &frame_layout.clobbered_callee_saves {
733
let r_reg = reg.to_reg();
734
let ty = match r_reg.class() {
735
RegClass::Int => types::I64,
736
RegClass::Float => types::I8X16,
737
RegClass::Vector => unreachable!(),
738
};
739
740
// Align to 8 or 16 bytes as required by the storage type of the clobber.
741
cur_offset = align_to(cur_offset, ty.bytes());
742
let off = cur_offset;
743
cur_offset += ty.bytes();
744
745
insts.push(Inst::store(
746
ty,
747
r_reg.into(),
748
Amode::imm_reg(i32::try_from(off + clobber_offset).unwrap(), regs::rsp()),
749
));
750
751
if flags.unwind_info() {
752
insts.push(Inst::Unwind {
753
inst: UnwindInst::SaveReg {
754
clobber_offset: off,
755
reg: r_reg,
756
},
757
});
758
}
759
}
760
761
insts
762
}
763
764
fn gen_clobber_restore(
765
_call_conv: isa::CallConv,
766
_flags: &settings::Flags,
767
frame_layout: &FrameLayout,
768
) -> SmallVec<[Self::I; 16]> {
769
let mut insts = SmallVec::new();
770
771
// Restore regs by loading from offsets of RSP. We compute the offset from
772
// the same base as above in clobber_save, as RSP won't change between the
773
// prologue and epilogue.
774
let mut cur_offset =
775
frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
776
for reg in &frame_layout.clobbered_callee_saves {
777
let rreg = reg.to_reg();
778
let ty = match rreg.class() {
779
RegClass::Int => types::I64,
780
RegClass::Float => types::I8X16,
781
RegClass::Vector => unreachable!(),
782
};
783
784
// Align to 8 or 16 bytes as required by the storage type of the clobber.
785
cur_offset = align_to(cur_offset, ty.bytes());
786
787
insts.push(Inst::load(
788
ty,
789
Amode::imm_reg(cur_offset.try_into().unwrap(), regs::rsp()),
790
Writable::from_reg(rreg.into()),
791
ExtKind::None,
792
));
793
794
cur_offset += ty.bytes();
795
}
796
797
let stack_size = frame_layout.fixed_frame_storage_size
798
+ frame_layout.clobber_size
799
+ frame_layout.outgoing_args_size;
800
801
// Adjust RSP back upward.
802
if stack_size > 0 {
803
let rsp = Writable::from_reg(regs::rsp());
804
let stack_size = i32::try_from(stack_size)
805
.expect("`stack_size` is too large to fit in a 32-bit immediate");
806
insts.push(Inst::addq_mi(rsp, stack_size));
807
}
808
809
insts
810
}
811
812
fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
813
call_conv: isa::CallConv,
814
dst: Reg,
815
src: Reg,
816
size: usize,
817
mut alloc_tmp: F,
818
) -> SmallVec<[Self::I; 8]> {
819
let mut insts = SmallVec::new();
820
let arg0 = get_intreg_for_arg(call_conv, 0, 0).unwrap();
821
let arg1 = get_intreg_for_arg(call_conv, 1, 1).unwrap();
822
let arg2 = get_intreg_for_arg(call_conv, 2, 2).unwrap();
823
let temp = alloc_tmp(Self::word_type());
824
let temp2 = alloc_tmp(Self::word_type());
825
insts.push(Inst::imm(OperandSize::Size64, size as u64, temp));
826
// We use an indirect call and a full LoadExtName because we do not have
827
// information about the libcall `RelocDistance` here, so we
828
// conservatively use the more flexible calling sequence.
829
insts.push(Inst::LoadExtName {
830
dst: temp2.map(Gpr::unwrap_new),
831
name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),
832
offset: 0,
833
distance: RelocDistance::Far,
834
});
835
let callee_pop_size = 0;
836
insts.push(Inst::call_unknown(Box::new(CallInfo {
837
dest: RegMem::reg(temp2.to_reg()),
838
uses: smallvec![
839
CallArgPair {
840
vreg: dst,
841
preg: arg0
842
},
843
CallArgPair {
844
vreg: src,
845
preg: arg1
846
},
847
CallArgPair {
848
vreg: temp.to_reg(),
849
preg: arg2
850
},
851
],
852
defs: smallvec![],
853
clobbers: Self::get_regs_clobbered_by_call(call_conv, false),
854
callee_pop_size,
855
callee_conv: call_conv,
856
caller_conv: call_conv,
857
try_call_info: None,
858
patchable: false,
859
})));
860
insts
861
}
862
863
fn get_number_of_spillslots_for_value(
864
rc: RegClass,
865
vector_scale: u32,
866
_isa_flags: &Self::F,
867
) -> u32 {
868
// We allocate in terms of 8-byte slots.
869
match rc {
870
RegClass::Int => 1,
871
RegClass::Float => vector_scale / 8,
872
RegClass::Vector => unreachable!(),
873
}
874
}
875
876
fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
877
if flags.enable_pinned_reg() {
878
static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
879
MACHINE_ENV.get_or_init(|| create_reg_env_systemv(true))
880
} else {
881
static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
882
MACHINE_ENV.get_or_init(|| create_reg_env_systemv(false))
883
}
884
}
885
886
fn get_regs_clobbered_by_call(
887
call_conv_of_callee: isa::CallConv,
888
is_exception: bool,
889
) -> PRegSet {
890
match (call_conv_of_callee, is_exception) {
891
(isa::CallConv::Tail, true) => ALL_CLOBBERS,
892
// Note that "PreserveAll" actually preserves nothing at
893
// the callsite if used for a `try_call`, because the
894
// unwinder ABI for `try_call`s is still "no clobbered
895
// register restores" for this ABI (so as to work with
896
// Wasmtime).
897
(isa::CallConv::PreserveAll, true) => ALL_CLOBBERS,
898
(isa::CallConv::Winch, _) => ALL_CLOBBERS,
899
(isa::CallConv::SystemV, _) => SYSV_CLOBBERS,
900
(isa::CallConv::WindowsFastcall, false) => WINDOWS_CLOBBERS,
901
(isa::CallConv::PreserveAll, _) => NO_CLOBBERS,
902
(_, false) => SYSV_CLOBBERS,
903
(call_conv, true) => panic!("unimplemented clobbers for exn abi of {call_conv:?}"),
904
}
905
}
906
907
fn get_ext_mode(
908
_call_conv: isa::CallConv,
909
specified: ir::ArgumentExtension,
910
) -> ir::ArgumentExtension {
911
specified
912
}
913
914
fn compute_frame_layout(
915
call_conv: CallConv,
916
flags: &settings::Flags,
917
_sig: &Signature,
918
regs: &[Writable<RealReg>],
919
function_calls: FunctionCalls,
920
incoming_args_size: u32,
921
tail_args_size: u32,
922
stackslots_size: u32,
923
fixed_frame_storage_size: u32,
924
outgoing_args_size: u32,
925
) -> FrameLayout {
926
debug_assert!(tail_args_size >= incoming_args_size);
927
928
let mut regs: Vec<Writable<RealReg>> = match call_conv {
929
// The `winch` calling convention doesn't have any callee-save
930
// registers.
931
CallConv::Winch => vec![],
932
CallConv::Fast | CallConv::SystemV | CallConv::Tail => regs
933
.iter()
934
.cloned()
935
.filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))
936
.collect(),
937
CallConv::WindowsFastcall => regs
938
.iter()
939
.cloned()
940
.filter(|r| is_callee_save_fastcall(r.to_reg(), flags.enable_pinned_reg()))
941
.collect(),
942
// The `preserve_all` calling convention makes every reg a callee-save reg.
943
CallConv::PreserveAll => regs.iter().cloned().collect(),
944
CallConv::Probestack => todo!("probestack?"),
945
CallConv::AppleAarch64 => unreachable!(),
946
};
947
// Sort registers for deterministic code output. We can do an unstable sort because the
948
// registers will be unique (there are no dups).
949
regs.sort_unstable();
950
951
// Compute clobber size.
952
let clobber_size = compute_clobber_size(&regs);
953
954
// Compute setup area size.
955
let setup_area_size = 16; // RBP, return address
956
957
// Return FrameLayout structure.
958
FrameLayout {
959
word_bytes: 8,
960
incoming_args_size,
961
tail_args_size: align_to(tail_args_size, 16),
962
setup_area_size,
963
clobber_size,
964
fixed_frame_storage_size,
965
stackslots_size,
966
outgoing_args_size,
967
clobbered_callee_saves: regs,
968
function_calls,
969
}
970
}
971
972
fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {
973
// Use r11 as a temp: clobbered anyway, and
974
// not otherwise used as a return value in any of our
975
// supported calling conventions.
976
Writable::from_reg(regs::r11())
977
}
978
979
fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {
980
const PAYLOAD_REGS: &'static [Reg] = &[regs::rax(), regs::rdx()];
981
match call_conv {
982
isa::CallConv::SystemV | isa::CallConv::Tail | isa::CallConv::PreserveAll => {
983
PAYLOAD_REGS
984
}
985
_ => &[],
986
}
987
}
988
}
989
990
impl From<StackAMode> for SyntheticAmode {
991
fn from(amode: StackAMode) -> Self {
992
// We enforce a 128 MB stack-frame size limit above, so these
993
// `expect()`s should never fail.
994
match amode {
995
StackAMode::IncomingArg(off, stack_args_size) => {
996
let offset = u32::try_from(off).expect(
997
"Offset in IncomingArg is greater than 4GB; should hit impl limit first",
998
);
999
SyntheticAmode::IncomingArg {
1000
offset: stack_args_size - offset,
1001
}
1002
}
1003
StackAMode::Slot(off) => {
1004
let off = i32::try_from(off)
1005
.expect("Offset in Slot is greater than 2GB; should hit impl limit first");
1006
SyntheticAmode::slot_offset(off)
1007
}
1008
StackAMode::OutgoingArg(off) => {
1009
let off = i32::try_from(off).expect(
1010
"Offset in OutgoingArg is greater than 2GB; should hit impl limit first",
1011
);
1012
SyntheticAmode::Real(Amode::ImmReg {
1013
simm32: off,
1014
base: regs::rsp(),
1015
flags: MemFlags::trusted(),
1016
})
1017
}
1018
}
1019
}
1020
}
1021
1022
fn get_intreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1023
let is_fastcall = call_conv == CallConv::WindowsFastcall;
1024
1025
// Fastcall counts by absolute argument number; SysV counts by argument of
1026
// this (integer) class.
1027
let i = if is_fastcall { arg_idx } else { idx };
1028
match (i, is_fastcall) {
1029
(0, false) => Some(regs::rdi()),
1030
(1, false) => Some(regs::rsi()),
1031
(2, false) => Some(regs::rdx()),
1032
(3, false) => Some(regs::rcx()),
1033
(4, false) => Some(regs::r8()),
1034
(5, false) => Some(regs::r9()),
1035
(0, true) => Some(regs::rcx()),
1036
(1, true) => Some(regs::rdx()),
1037
(2, true) => Some(regs::r8()),
1038
(3, true) => Some(regs::r9()),
1039
_ => None,
1040
}
1041
}
1042
1043
fn get_fltreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1044
let is_fastcall = call_conv == CallConv::WindowsFastcall;
1045
1046
// Fastcall counts by absolute argument number; SysV counts by argument of
1047
// this (floating-point) class.
1048
let i = if is_fastcall { arg_idx } else { idx };
1049
match (i, is_fastcall) {
1050
(0, false) => Some(regs::xmm0()),
1051
(1, false) => Some(regs::xmm1()),
1052
(2, false) => Some(regs::xmm2()),
1053
(3, false) => Some(regs::xmm3()),
1054
(4, false) => Some(regs::xmm4()),
1055
(5, false) => Some(regs::xmm5()),
1056
(6, false) => Some(regs::xmm6()),
1057
(7, false) => Some(regs::xmm7()),
1058
(0, true) => Some(regs::xmm0()),
1059
(1, true) => Some(regs::xmm1()),
1060
(2, true) => Some(regs::xmm2()),
1061
(3, true) => Some(regs::xmm3()),
1062
_ => None,
1063
}
1064
}
1065
1066
fn get_intreg_for_retval(
1067
call_conv: CallConv,
1068
flags: &settings::Flags,
1069
intreg_idx: usize,
1070
is_last: bool,
1071
) -> Option<Reg> {
1072
match call_conv {
1073
CallConv::Tail => match intreg_idx {
1074
0 => Some(regs::rax()),
1075
1 => Some(regs::rcx()),
1076
2 => Some(regs::rdx()),
1077
3 => Some(regs::rsi()),
1078
4 => Some(regs::rdi()),
1079
5 => Some(regs::r8()),
1080
6 => Some(regs::r9()),
1081
7 => Some(regs::r10()),
1082
// NB: `r11` is reserved as a scratch register that is
1083
// also part of the clobber set.
1084
// NB: `r15` is reserved as a scratch register.
1085
_ => None,
1086
},
1087
CallConv::Fast | CallConv::SystemV | CallConv::PreserveAll => match intreg_idx {
1088
0 => Some(regs::rax()),
1089
1 => Some(regs::rdx()),
1090
2 if flags.enable_llvm_abi_extensions() => Some(regs::rcx()),
1091
_ => None,
1092
},
1093
CallConv::WindowsFastcall => match intreg_idx {
1094
0 => Some(regs::rax()),
1095
1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
1096
_ => None,
1097
},
1098
1099
CallConv::Winch => is_last.then(|| regs::rax()),
1100
CallConv::Probestack => todo!(),
1101
CallConv::AppleAarch64 => unreachable!(),
1102
}
1103
}
1104
1105
fn get_fltreg_for_retval(call_conv: CallConv, fltreg_idx: usize, is_last: bool) -> Option<Reg> {
1106
match call_conv {
1107
CallConv::Tail => match fltreg_idx {
1108
0 => Some(regs::xmm0()),
1109
1 => Some(regs::xmm1()),
1110
2 => Some(regs::xmm2()),
1111
3 => Some(regs::xmm3()),
1112
4 => Some(regs::xmm4()),
1113
5 => Some(regs::xmm5()),
1114
6 => Some(regs::xmm6()),
1115
7 => Some(regs::xmm7()),
1116
_ => None,
1117
},
1118
CallConv::Fast | CallConv::SystemV | CallConv::PreserveAll => match fltreg_idx {
1119
0 => Some(regs::xmm0()),
1120
1 => Some(regs::xmm1()),
1121
_ => None,
1122
},
1123
CallConv::WindowsFastcall => match fltreg_idx {
1124
0 => Some(regs::xmm0()),
1125
_ => None,
1126
},
1127
CallConv::Winch => is_last.then(|| regs::xmm0()),
1128
CallConv::Probestack => todo!(),
1129
CallConv::AppleAarch64 => unreachable!(),
1130
}
1131
}
1132
1133
fn is_callee_save_systemv(r: RealReg, enable_pinned_reg: bool) -> bool {
1134
use asm::gpr::enc::*;
1135
1136
match r.class() {
1137
RegClass::Int => match r.hw_enc() {
1138
RBX | RBP | R12 | R13 | R14 => true,
1139
// R15 is the pinned register; if we're using it that way,
1140
// it is effectively globally-allocated, and is not
1141
// callee-saved.
1142
R15 => !enable_pinned_reg,
1143
_ => false,
1144
},
1145
RegClass::Float => false,
1146
RegClass::Vector => unreachable!(),
1147
}
1148
}
1149
1150
fn is_callee_save_fastcall(r: RealReg, enable_pinned_reg: bool) -> bool {
1151
use asm::gpr::enc::*;
1152
use asm::xmm::enc::*;
1153
1154
match r.class() {
1155
RegClass::Int => match r.hw_enc() {
1156
RBX | RBP | RSI | RDI | R12 | R13 | R14 => true,
1157
// See above for SysV: we must treat the pinned reg specially.
1158
R15 => !enable_pinned_reg,
1159
_ => false,
1160
},
1161
RegClass::Float => match r.hw_enc() {
1162
XMM6 | XMM7 | XMM8 | XMM9 | XMM10 | XMM11 | XMM12 | XMM13 | XMM14 | XMM15 => true,
1163
_ => false,
1164
},
1165
RegClass::Vector => unreachable!(),
1166
}
1167
}
1168
1169
fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
1170
let mut clobbered_size = 0;
1171
for reg in clobbers {
1172
match reg.to_reg().class() {
1173
RegClass::Int => {
1174
clobbered_size += 8;
1175
}
1176
RegClass::Float => {
1177
clobbered_size = align_to(clobbered_size, 16);
1178
clobbered_size += 16;
1179
}
1180
RegClass::Vector => unreachable!(),
1181
}
1182
}
1183
align_to(clobbered_size, 16)
1184
}
1185
1186
const WINDOWS_CLOBBERS: PRegSet = windows_clobbers();
1187
const SYSV_CLOBBERS: PRegSet = sysv_clobbers();
1188
pub(crate) const ALL_CLOBBERS: PRegSet = all_clobbers();
1189
const NO_CLOBBERS: PRegSet = PRegSet::empty();
1190
1191
const fn windows_clobbers() -> PRegSet {
1192
use asm::gpr::enc::*;
1193
use asm::xmm::enc::*;
1194
1195
PRegSet::empty()
1196
.with(regs::gpr_preg(RAX))
1197
.with(regs::gpr_preg(RCX))
1198
.with(regs::gpr_preg(RDX))
1199
.with(regs::gpr_preg(R8))
1200
.with(regs::gpr_preg(R9))
1201
.with(regs::gpr_preg(R10))
1202
.with(regs::gpr_preg(R11))
1203
.with(regs::fpr_preg(XMM0))
1204
.with(regs::fpr_preg(XMM1))
1205
.with(regs::fpr_preg(XMM2))
1206
.with(regs::fpr_preg(XMM3))
1207
.with(regs::fpr_preg(XMM4))
1208
.with(regs::fpr_preg(XMM5))
1209
}
1210
1211
const fn sysv_clobbers() -> PRegSet {
1212
use asm::gpr::enc::*;
1213
use asm::xmm::enc::*;
1214
1215
PRegSet::empty()
1216
.with(regs::gpr_preg(RAX))
1217
.with(regs::gpr_preg(RCX))
1218
.with(regs::gpr_preg(RDX))
1219
.with(regs::gpr_preg(RSI))
1220
.with(regs::gpr_preg(RDI))
1221
.with(regs::gpr_preg(R8))
1222
.with(regs::gpr_preg(R9))
1223
.with(regs::gpr_preg(R10))
1224
.with(regs::gpr_preg(R11))
1225
.with(regs::fpr_preg(XMM0))
1226
.with(regs::fpr_preg(XMM1))
1227
.with(regs::fpr_preg(XMM2))
1228
.with(regs::fpr_preg(XMM3))
1229
.with(regs::fpr_preg(XMM4))
1230
.with(regs::fpr_preg(XMM5))
1231
.with(regs::fpr_preg(XMM6))
1232
.with(regs::fpr_preg(XMM7))
1233
.with(regs::fpr_preg(XMM8))
1234
.with(regs::fpr_preg(XMM9))
1235
.with(regs::fpr_preg(XMM10))
1236
.with(regs::fpr_preg(XMM11))
1237
.with(regs::fpr_preg(XMM12))
1238
.with(regs::fpr_preg(XMM13))
1239
.with(regs::fpr_preg(XMM14))
1240
.with(regs::fpr_preg(XMM15))
1241
}
1242
1243
/// For calling conventions that clobber all registers.
1244
const fn all_clobbers() -> PRegSet {
1245
use asm::gpr::enc::*;
1246
use asm::xmm::enc::*;
1247
1248
PRegSet::empty()
1249
.with(regs::gpr_preg(RAX))
1250
.with(regs::gpr_preg(RCX))
1251
.with(regs::gpr_preg(RDX))
1252
.with(regs::gpr_preg(RBX))
1253
.with(regs::gpr_preg(RSI))
1254
.with(regs::gpr_preg(RDI))
1255
.with(regs::gpr_preg(R8))
1256
.with(regs::gpr_preg(R9))
1257
.with(regs::gpr_preg(R10))
1258
.with(regs::gpr_preg(R11))
1259
.with(regs::gpr_preg(R12))
1260
.with(regs::gpr_preg(R13))
1261
.with(regs::gpr_preg(R14))
1262
.with(regs::gpr_preg(R15))
1263
.with(regs::fpr_preg(XMM0))
1264
.with(regs::fpr_preg(XMM1))
1265
.with(regs::fpr_preg(XMM2))
1266
.with(regs::fpr_preg(XMM3))
1267
.with(regs::fpr_preg(XMM4))
1268
.with(regs::fpr_preg(XMM5))
1269
.with(regs::fpr_preg(XMM6))
1270
.with(regs::fpr_preg(XMM7))
1271
.with(regs::fpr_preg(XMM8))
1272
.with(regs::fpr_preg(XMM9))
1273
.with(regs::fpr_preg(XMM10))
1274
.with(regs::fpr_preg(XMM11))
1275
.with(regs::fpr_preg(XMM12))
1276
.with(regs::fpr_preg(XMM13))
1277
.with(regs::fpr_preg(XMM14))
1278
.with(regs::fpr_preg(XMM15))
1279
}
1280
1281
fn create_reg_env_systemv(enable_pinned_reg: bool) -> MachineEnv {
1282
fn preg(r: Reg) -> PReg {
1283
r.to_real_reg().unwrap().into()
1284
}
1285
1286
let mut env = MachineEnv {
1287
preferred_regs_by_class: [
1288
// Preferred GPRs: caller-saved in the SysV ABI.
1289
vec![
1290
preg(regs::rsi()),
1291
preg(regs::rdi()),
1292
preg(regs::rax()),
1293
preg(regs::rcx()),
1294
preg(regs::rdx()),
1295
preg(regs::r8()),
1296
preg(regs::r9()),
1297
preg(regs::r10()),
1298
preg(regs::r11()),
1299
],
1300
// Preferred XMMs: the first 8, which can have smaller encodings
1301
// with AVX instructions.
1302
vec![
1303
preg(regs::xmm0()),
1304
preg(regs::xmm1()),
1305
preg(regs::xmm2()),
1306
preg(regs::xmm3()),
1307
preg(regs::xmm4()),
1308
preg(regs::xmm5()),
1309
preg(regs::xmm6()),
1310
preg(regs::xmm7()),
1311
],
1312
// The Vector Regclass is unused
1313
vec![],
1314
],
1315
non_preferred_regs_by_class: [
1316
// Non-preferred GPRs: callee-saved in the SysV ABI.
1317
vec![
1318
preg(regs::rbx()),
1319
preg(regs::r12()),
1320
preg(regs::r13()),
1321
preg(regs::r14()),
1322
],
1323
// Non-preferred XMMs: the last 8 registers, which can have larger
1324
// encodings with AVX instructions.
1325
vec![
1326
preg(regs::xmm8()),
1327
preg(regs::xmm9()),
1328
preg(regs::xmm10()),
1329
preg(regs::xmm11()),
1330
preg(regs::xmm12()),
1331
preg(regs::xmm13()),
1332
preg(regs::xmm14()),
1333
preg(regs::xmm15()),
1334
],
1335
// The Vector Regclass is unused
1336
vec![],
1337
],
1338
fixed_stack_slots: vec![],
1339
scratch_by_class: [None, None, None],
1340
};
1341
1342
debug_assert_eq!(regs::r15(), regs::pinned_reg());
1343
if !enable_pinned_reg {
1344
env.non_preferred_regs_by_class[0].push(preg(regs::r15()));
1345
}
1346
1347
env
1348
}
1349
1350