Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/x64/abi.rs
1693 views
1
//! Implementation of the standard x64 ABI.
2
3
use crate::CodegenResult;
4
use crate::ir::{self, LibCall, MemFlags, Signature, TrapCode, types};
5
use crate::ir::{ExternalName, types::*};
6
use crate::isa;
7
use crate::isa::winch;
8
use crate::isa::{CallConv, unwind::UnwindInst, x64::inst::*, x64::settings as x64_settings};
9
use crate::machinst::abi::*;
10
use crate::machinst::*;
11
use crate::settings;
12
use alloc::boxed::Box;
13
use alloc::vec::Vec;
14
use args::*;
15
use cranelift_assembler_x64 as asm;
16
use regalloc2::{MachineEnv, PReg, PRegSet};
17
use smallvec::{SmallVec, smallvec};
18
use std::borrow::ToOwned;
19
use std::sync::OnceLock;
20
21
/// Support for the x64 ABI from the callee side (within a function body).
22
pub(crate) type X64Callee = Callee<X64ABIMachineSpec>;
23
24
/// Implementation of ABI primitives for x64.
25
pub struct X64ABIMachineSpec;
26
27
impl X64ABIMachineSpec {
28
fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
29
insts.reserve(probe_count as usize);
30
for _ in 0..probe_count {
31
// "Allocate" stack space for the probe by decrementing the stack pointer before
32
// the write. This is required to make valgrind happy.
33
// See: https://github.com/bytecodealliance/wasmtime/issues/7454
34
insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
35
36
// TODO: It would be nice if we could store the imm 0, but we don't have insts for those
37
// so store the stack pointer. Any register will do, since the stack is undefined at this point
38
insts.push(Inst::store(
39
I32,
40
regs::rsp(),
41
Amode::imm_reg(0, regs::rsp()),
42
));
43
}
44
45
// Restore the stack pointer to its original value
46
insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
47
}
48
49
fn gen_probestack_loop(
50
insts: &mut SmallInstVec<Inst>,
51
_call_conv: isa::CallConv,
52
frame_size: u32,
53
guard_size: u32,
54
) {
55
// We have to use a caller-saved register since clobbering only
56
// happens after stack probing.
57
// `r11` is caller saved on both Fastcall and SystemV, and not used
58
// for argument passing, so it's pretty much free. It is also not
59
// used by the stacklimit mechanism.
60
let tmp = regs::r11();
61
debug_assert!({
62
let real_reg = tmp.to_real_reg().unwrap();
63
!is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)
64
});
65
66
insts.push(Inst::StackProbeLoop {
67
tmp: Writable::from_reg(tmp),
68
frame_size,
69
guard_size,
70
});
71
}
72
}
73
74
impl IsaFlags for x64_settings::Flags {}
75
76
impl ABIMachineSpec for X64ABIMachineSpec {
77
type I = Inst;
78
79
type F = x64_settings::Flags;
80
81
/// This is the limit for the size of argument and return-value areas on the
82
/// stack. We place a reasonable limit here to avoid integer overflow issues
83
/// with 32-bit arithmetic: for now, 128 MB.
84
const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
85
86
fn word_bits() -> u32 {
87
64
88
}
89
90
/// Return required stack alignment in bytes.
91
fn stack_align(_call_conv: isa::CallConv) -> u32 {
92
16
93
}
94
95
fn compute_arg_locs(
96
call_conv: isa::CallConv,
97
flags: &settings::Flags,
98
params: &[ir::AbiParam],
99
args_or_rets: ArgsOrRets,
100
add_ret_area_ptr: bool,
101
mut args: ArgsAccumulator,
102
) -> CodegenResult<(u32, Option<usize>)> {
103
let is_fastcall = call_conv == CallConv::WindowsFastcall;
104
let is_tail = call_conv == CallConv::Tail;
105
106
let mut next_gpr = 0;
107
let mut next_vreg = 0;
108
let mut next_stack: u32 = 0;
109
let mut next_param_idx = 0; // Fastcall cares about overall param index
110
111
if args_or_rets == ArgsOrRets::Args && is_fastcall {
112
// Fastcall always reserves 32 bytes of shadow space corresponding to
113
// the four initial in-arg parameters.
114
//
115
// (See:
116
// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170)
117
next_stack = 32;
118
}
119
120
let ret_area_ptr = if add_ret_area_ptr {
121
debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
122
next_gpr += 1;
123
next_param_idx += 1;
124
// In the SystemV and WindowsFastcall ABIs, the return area pointer is the first
125
// argument. For the Tail and Winch ABIs we do the same for simplicity sake.
126
Some(ABIArg::reg(
127
get_intreg_for_arg(call_conv, 0, 0)
128
.unwrap()
129
.to_real_reg()
130
.unwrap(),
131
types::I64,
132
ir::ArgumentExtension::None,
133
ir::ArgumentPurpose::Normal,
134
))
135
} else {
136
None
137
};
138
139
// If any param uses extension, the winch calling convention will not pack its results
140
// on the stack and will instead align them to 8-byte boundaries the same way that all the
141
// other calling conventions do. This isn't consistent with Winch itself, but is fine as
142
// Winch only uses this calling convention via trampolines, and those trampolines don't add
143
// extension annotations. Additionally, handling extension attributes this way allows clif
144
// functions that use them with the Winch calling convention to interact successfully with
145
// testing infrastructure.
146
// The results are also not packed if any of the types are `f16`. This is to simplify the
147
// implementation of `Inst::load`/`Inst::store` (which would otherwise require multiple
148
// instructions), and doesn't affect Winch itself as Winch doesn't support `f16` at all.
149
let uses_extension = params.iter().any(|p| {
150
p.extension != ir::ArgumentExtension::None
151
|| p.value_type == types::F16
152
|| p.value_type == types::I8X2
153
});
154
155
for (ix, param) in params.iter().enumerate() {
156
let last_param = ix == params.len() - 1;
157
158
if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
159
let offset = next_stack as i64;
160
let size = size;
161
assert!(size % 8 == 0, "StructArgument size is not properly aligned");
162
next_stack += size;
163
args.push(ABIArg::StructArg {
164
offset,
165
size: size as u64,
166
purpose: param.purpose,
167
});
168
continue;
169
}
170
171
// Find regclass(es) of the register(s) used to store a value of this type.
172
let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
173
174
// Now assign ABIArgSlots for each register-sized part.
175
//
176
// Note that the handling of `i128` values is unique here:
177
//
178
// - If `enable_llvm_abi_extensions` is set in the flags, each
179
// `i128` is split into two `i64`s and assigned exactly as if it
180
// were two consecutive 64-bit args, except that if one of the
181
// two halves is forced onto the stack, the other half is too.
182
// This is consistent with LLVM's behavior, and is needed for
183
// some uses of Cranelift (e.g., the rustc backend).
184
//
185
// - Otherwise, if the calling convention is Tail, we behave as in
186
// the previous case, even if `enable_llvm_abi_extensions` is not
187
// set in the flags: This is a custom calling convention defined
188
// by Cranelift, LLVM doesn't know about it.
189
//
190
// - Otherwise, both SysV and Fastcall specify behavior (use of
191
// vector register, a register pair, or passing by reference
192
// depending on the case), but for simplicity, we will just panic if
193
// an i128 type appears in a signature and the LLVM extensions flag
194
// is not set.
195
//
196
// For examples of how rustc compiles i128 args and return values on
197
// both SysV and Fastcall platforms, see:
198
// https://godbolt.org/z/PhG3ob
199
200
if param.value_type.bits() > 64
201
&& !(param.value_type.is_vector() || param.value_type.is_float())
202
&& !flags.enable_llvm_abi_extensions()
203
&& !is_tail
204
{
205
panic!(
206
"i128 args/return values not supported unless LLVM ABI extensions are enabled"
207
);
208
}
209
// As MSVC doesn't support f16/f128 there is no standard way to pass/return them with
210
// the Windows ABI. LLVM passes/returns them in XMM registers.
211
if matches!(param.value_type, types::F16 | types::F128)
212
&& is_fastcall
213
&& !flags.enable_llvm_abi_extensions()
214
{
215
panic!(
216
"f16/f128 args/return values not supported for windows_fastcall unless LLVM ABI extensions are enabled"
217
);
218
}
219
220
// Windows fastcall dictates that `__m128i` and `f128` parameters to
221
// a function are passed indirectly as pointers, so handle that as a
222
// special case before the loop below.
223
if (param.value_type.is_vector() || param.value_type.is_float())
224
&& param.value_type.bits() >= 128
225
&& args_or_rets == ArgsOrRets::Args
226
&& is_fastcall
227
{
228
let pointer = match get_intreg_for_arg(call_conv, next_gpr, next_param_idx) {
229
Some(reg) => {
230
next_gpr += 1;
231
ABIArgSlot::Reg {
232
reg: reg.to_real_reg().unwrap(),
233
ty: ir::types::I64,
234
extension: ir::ArgumentExtension::None,
235
}
236
}
237
238
None => {
239
next_stack = align_to(next_stack, 8) + 8;
240
ABIArgSlot::Stack {
241
offset: (next_stack - 8) as i64,
242
ty: ir::types::I64,
243
extension: param.extension,
244
}
245
}
246
};
247
next_param_idx += 1;
248
args.push(ABIArg::ImplicitPtrArg {
249
// NB: this is filled in after this loop
250
offset: 0,
251
pointer,
252
ty: param.value_type,
253
purpose: param.purpose,
254
});
255
continue;
256
}
257
258
// SystemV dictates that 128bit int parameters are always either
259
// passed in two registers or on the stack, so handle that as a
260
// special case before the loop below.
261
if param.value_type == types::I128
262
&& args_or_rets == ArgsOrRets::Args
263
&& call_conv == CallConv::SystemV
264
{
265
let mut slots = ABIArgSlotVec::new();
266
match (
267
get_intreg_for_arg(CallConv::SystemV, next_gpr, next_param_idx),
268
get_intreg_for_arg(CallConv::SystemV, next_gpr + 1, next_param_idx + 1),
269
) {
270
(Some(reg1), Some(reg2)) => {
271
slots.push(ABIArgSlot::Reg {
272
reg: reg1.to_real_reg().unwrap(),
273
ty: ir::types::I64,
274
extension: ir::ArgumentExtension::None,
275
});
276
slots.push(ABIArgSlot::Reg {
277
reg: reg2.to_real_reg().unwrap(),
278
ty: ir::types::I64,
279
extension: ir::ArgumentExtension::None,
280
});
281
}
282
_ => {
283
let size = 16;
284
285
// Align.
286
next_stack = align_to(next_stack, size);
287
288
slots.push(ABIArgSlot::Stack {
289
offset: next_stack as i64,
290
ty: ir::types::I64,
291
extension: param.extension,
292
});
293
slots.push(ABIArgSlot::Stack {
294
offset: next_stack as i64 + 8,
295
ty: ir::types::I64,
296
extension: param.extension,
297
});
298
next_stack += size;
299
}
300
};
301
// Unconditionally increment next_gpr even when storing the
302
// argument on the stack to prevent reusing a possibly
303
// remaining register for the next argument.
304
next_gpr += 2;
305
next_param_idx += 2;
306
307
args.push(ABIArg::Slots {
308
slots,
309
purpose: param.purpose,
310
});
311
continue;
312
}
313
314
let mut slots = ABIArgSlotVec::new();
315
for (ix, (rc, reg_ty)) in rcs.iter().zip(reg_tys.iter()).enumerate() {
316
let last_slot = last_param && ix == rcs.len() - 1;
317
318
let intreg = *rc == RegClass::Int;
319
let nextreg = if intreg {
320
match args_or_rets {
321
ArgsOrRets::Args => get_intreg_for_arg(call_conv, next_gpr, next_param_idx),
322
ArgsOrRets::Rets => {
323
get_intreg_for_retval(call_conv, flags, next_gpr, last_slot)
324
}
325
}
326
} else {
327
match args_or_rets {
328
ArgsOrRets::Args => {
329
get_fltreg_for_arg(call_conv, next_vreg, next_param_idx)
330
}
331
ArgsOrRets::Rets => get_fltreg_for_retval(call_conv, next_vreg, last_slot),
332
}
333
};
334
next_param_idx += 1;
335
if let Some(reg) = nextreg {
336
if intreg {
337
next_gpr += 1;
338
} else {
339
next_vreg += 1;
340
}
341
slots.push(ABIArgSlot::Reg {
342
reg: reg.to_real_reg().unwrap(),
343
ty: *reg_ty,
344
extension: param.extension,
345
});
346
} else {
347
if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
348
return Err(crate::CodegenError::Unsupported(
349
"Too many return values to fit in registers. \
350
Use a StructReturn argument instead. (#9510)"
351
.to_owned(),
352
));
353
}
354
355
let size = reg_ty.bytes();
356
let size = if call_conv == CallConv::Winch
357
&& args_or_rets == ArgsOrRets::Rets
358
&& !uses_extension
359
{
360
size
361
} else {
362
let size = std::cmp::max(size, 8);
363
364
// Align.
365
debug_assert!(size.is_power_of_two());
366
next_stack = align_to(next_stack, size);
367
size
368
};
369
370
slots.push(ABIArgSlot::Stack {
371
offset: next_stack as i64,
372
ty: *reg_ty,
373
extension: param.extension,
374
});
375
next_stack += size;
376
}
377
}
378
379
args.push(ABIArg::Slots {
380
slots,
381
purpose: param.purpose,
382
});
383
}
384
385
// Fastcall's indirect 128+ bit vector arguments are all located on the
386
// stack, and stack space is reserved after all parameters are passed,
387
// so allocate from the space now.
388
if args_or_rets == ArgsOrRets::Args && is_fastcall {
389
for arg in args.args_mut() {
390
if let ABIArg::ImplicitPtrArg { offset, .. } = arg {
391
assert_eq!(*offset, 0);
392
next_stack = align_to(next_stack, 16);
393
*offset = next_stack as i64;
394
next_stack += 16;
395
}
396
}
397
}
398
let extra_arg_idx = if let Some(ret_area_ptr) = ret_area_ptr {
399
args.push_non_formal(ret_area_ptr);
400
Some(args.args().len() - 1)
401
} else {
402
None
403
};
404
405
// Winch writes the first result to the highest offset, so we need to iterate through the
406
// args and adjust the offsets down.
407
if call_conv == CallConv::Winch && args_or_rets == ArgsOrRets::Rets {
408
winch::reverse_stack(args, next_stack, uses_extension);
409
}
410
411
next_stack = align_to(next_stack, 16);
412
413
Ok((next_stack, extra_arg_idx))
414
}
415
416
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
417
// For integer-typed values, we always load a full 64 bits (and we always spill a full 64
418
// bits as well -- see `Inst::store()`).
419
let ty = match ty {
420
types::I8 | types::I16 | types::I32 => types::I64,
421
// Stack slots are always at least 8 bytes, so it's fine to load 4 bytes instead of only
422
// two.
423
types::F16 | types::I8X2 => types::F32,
424
_ => ty,
425
};
426
Inst::load(ty, mem, into_reg, ExtKind::None)
427
}
428
429
fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
430
let ty = match ty {
431
// See `gen_load_stack`.
432
types::F16 | types::I8X2 => types::F32,
433
_ => ty,
434
};
435
Inst::store(ty, from_reg, mem)
436
}
437
438
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
439
Inst::gen_move(to_reg, from_reg, ty)
440
}
441
442
/// Generate an integer-extend operation.
443
fn gen_extend(
444
to_reg: Writable<Reg>,
445
from_reg: Reg,
446
is_signed: bool,
447
from_bits: u8,
448
to_bits: u8,
449
) -> Self::I {
450
let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)
451
.unwrap_or_else(|| panic!("invalid extension: {from_bits} -> {to_bits}"));
452
if is_signed {
453
Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
454
} else {
455
Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
456
}
457
}
458
459
fn gen_args(args: Vec<ArgPair>) -> Inst {
460
Inst::Args { args }
461
}
462
463
fn gen_rets(rets: Vec<RetPair>) -> Inst {
464
Inst::Rets { rets }
465
}
466
467
fn gen_add_imm(
468
_call_conv: isa::CallConv,
469
into_reg: Writable<Reg>,
470
from_reg: Reg,
471
imm: u32,
472
) -> SmallInstVec<Self::I> {
473
let mut ret = SmallVec::new();
474
if from_reg != into_reg.to_reg() {
475
ret.push(Inst::gen_move(into_reg, from_reg, I64));
476
}
477
let imm = i32::try_from(imm).expect("`imm` is too large to fit in a 32-bit immediate");
478
ret.push(Inst::addq_mi(into_reg, imm));
479
ret
480
}
481
482
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {
483
smallvec![
484
Inst::External {
485
inst: asm::inst::cmpq_rm::new(Gpr::unwrap_new(limit_reg), Gpr::RSP,).into(),
486
},
487
Inst::TrapIf {
488
// NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
489
cc: CC::NBE,
490
trap_code: TrapCode::STACK_OVERFLOW,
491
},
492
]
493
}
494
495
fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Self::I {
496
let mem: SyntheticAmode = mem.into();
497
Inst::External {
498
inst: asm::inst::leaq_rm::new(into_reg, mem).into(),
499
}
500
}
501
502
fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
503
// As per comment on trait definition, we must return a caller-save
504
// register that is not used as an argument here.
505
debug_assert!(!is_callee_save_systemv(
506
regs::r10().to_real_reg().unwrap(),
507
false
508
));
509
regs::r10()
510
}
511
512
fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
513
// Only ever used for I64s, F128s and vectors; if that changes, see if
514
// the ExtKind below needs to be changed.
515
assert!(ty == I64 || ty.is_vector() || ty == F128);
516
let mem = Amode::imm_reg(offset, base);
517
Inst::load(ty, mem, into_reg, ExtKind::None)
518
}
519
520
fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
521
let ty = match ty {
522
// See `gen_load_stack`.
523
types::F16 | types::I8X2 => types::F32,
524
_ => ty,
525
};
526
let mem = Amode::imm_reg(offset, base);
527
Inst::store(ty, from_reg, mem)
528
}
529
530
fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
531
let rsp = Writable::from_reg(regs::rsp());
532
let inst = if amount >= 0 {
533
Inst::addq_mi(rsp, amount)
534
} else {
535
Inst::subq_mi(rsp, -amount)
536
};
537
smallvec![inst]
538
}
539
540
fn gen_prologue_frame_setup(
541
_call_conv: isa::CallConv,
542
flags: &settings::Flags,
543
_isa_flags: &x64_settings::Flags,
544
frame_layout: &FrameLayout,
545
) -> SmallInstVec<Self::I> {
546
let r_rsp = Gpr::RSP;
547
let r_rbp = Gpr::RBP;
548
let w_rbp = Writable::from_reg(r_rbp);
549
let mut insts = SmallVec::new();
550
// `push %rbp`
551
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
552
insts.push(Inst::External {
553
inst: asm::inst::pushq_o::new(r_rbp).into(),
554
});
555
556
if flags.unwind_info() {
557
insts.push(Inst::Unwind {
558
inst: UnwindInst::PushFrameRegs {
559
offset_upward_to_caller_sp: frame_layout.setup_area_size,
560
},
561
});
562
}
563
564
// `mov %rsp, %rbp`
565
// RSP is now 0 % 16
566
insts.push(Inst::External {
567
inst: asm::inst::movq_mr::new(w_rbp, r_rsp).into(),
568
});
569
570
insts
571
}
572
573
fn gen_epilogue_frame_restore(
574
_call_conv: isa::CallConv,
575
_flags: &settings::Flags,
576
_isa_flags: &x64_settings::Flags,
577
_frame_layout: &FrameLayout,
578
) -> SmallInstVec<Self::I> {
579
let rbp = Gpr::RBP;
580
let rsp = Gpr::RSP;
581
582
let mut insts = SmallVec::new();
583
// `mov %rbp, %rsp`
584
insts.push(Inst::External {
585
inst: asm::inst::movq_mr::new(Writable::from_reg(rsp), rbp).into(),
586
});
587
// `pop %rbp`
588
insts.push(Inst::External {
589
inst: asm::inst::popq_o::new(Writable::from_reg(rbp)).into(),
590
});
591
insts
592
}
593
594
fn gen_return(
595
call_conv: CallConv,
596
_isa_flags: &x64_settings::Flags,
597
frame_layout: &FrameLayout,
598
) -> SmallInstVec<Self::I> {
599
// Emit return instruction.
600
let stack_bytes_to_pop = if call_conv == CallConv::Tail {
601
frame_layout.tail_args_size
602
} else {
603
0
604
};
605
let inst = if stack_bytes_to_pop == 0 {
606
asm::inst::retq_zo::new().into()
607
} else {
608
let stack_bytes_to_pop = u16::try_from(stack_bytes_to_pop).unwrap();
609
asm::inst::retq_i::new(stack_bytes_to_pop).into()
610
};
611
smallvec![Inst::External { inst }]
612
}
613
614
fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
615
insts.push(Inst::imm(
616
OperandSize::Size32,
617
frame_size as u64,
618
Writable::from_reg(regs::rax()),
619
));
620
insts.push(Inst::CallKnown {
621
// No need to include arg here: we are post-regalloc
622
// so no constraints will be seen anyway.
623
info: Box::new(CallInfo::empty(
624
ExternalName::LibCall(LibCall::Probestack),
625
CallConv::Probestack,
626
)),
627
});
628
}
629
630
fn gen_inline_probestack(
631
insts: &mut SmallInstVec<Self::I>,
632
call_conv: isa::CallConv,
633
frame_size: u32,
634
guard_size: u32,
635
) {
636
// Unroll at most n consecutive probes, before falling back to using a loop
637
//
638
// This was number was picked because the loop version is 38 bytes long. We can fit
639
// 4 inline probes in that space, so unroll if its beneficial in terms of code size.
640
const PROBE_MAX_UNROLL: u32 = 4;
641
642
// Calculate how many probes we need to perform. Round down, as we only
643
// need to probe whole guard_size regions we'd otherwise skip over.
644
let probe_count = frame_size / guard_size;
645
if probe_count == 0 {
646
// No probe necessary
647
} else if probe_count <= PROBE_MAX_UNROLL {
648
Self::gen_probestack_unroll(insts, guard_size, probe_count)
649
} else {
650
Self::gen_probestack_loop(insts, call_conv, frame_size, guard_size)
651
}
652
}
653
654
fn gen_clobber_save(
655
_call_conv: isa::CallConv,
656
flags: &settings::Flags,
657
frame_layout: &FrameLayout,
658
) -> SmallVec<[Self::I; 16]> {
659
let mut insts = SmallVec::new();
660
661
// When a return_call within this function required more stack arguments than we have
662
// present, resize the incoming argument area of the frame to accommodate those arguments.
663
let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
664
if incoming_args_diff > 0 {
665
// Decrement the stack pointer to make space for the new arguments.
666
let rsp = Writable::from_reg(regs::rsp());
667
insts.push(Inst::subq_mi(
668
rsp,
669
i32::try_from(incoming_args_diff)
670
.expect("`incoming_args_diff` is too large to fit in a 32-bit immediate"),
671
));
672
673
// Make sure to keep the frame pointer and stack pointer in sync at
674
// this point.
675
let rbp = Gpr::RBP;
676
let rsp = Gpr::RSP;
677
insts.push(Inst::External {
678
inst: asm::inst::movq_mr::new(Writable::from_reg(rbp), rsp).into(),
679
});
680
681
let incoming_args_diff = i32::try_from(incoming_args_diff).unwrap();
682
683
// Move the saved frame pointer down by `incoming_args_diff`.
684
let addr = Amode::imm_reg(incoming_args_diff, regs::rsp());
685
let r11 = Writable::from_reg(Gpr::R11);
686
let inst = asm::inst::movq_rm::new(r11, addr).into();
687
insts.push(Inst::External { inst });
688
let inst = asm::inst::movq_mr::new(Amode::imm_reg(0, regs::rsp()), r11.to_reg()).into();
689
insts.push(Inst::External { inst });
690
691
// Move the saved return address down by `incoming_args_diff`.
692
let addr = Amode::imm_reg(incoming_args_diff + 8, regs::rsp());
693
let inst = asm::inst::movq_rm::new(r11, addr).into();
694
insts.push(Inst::External { inst });
695
let inst = asm::inst::movq_mr::new(Amode::imm_reg(8, regs::rsp()), r11.to_reg()).into();
696
insts.push(Inst::External { inst });
697
}
698
699
// We need to factor `incoming_args_diff` into the offset upward here, as we have grown
700
// the argument area -- `setup_area_size` alone will not be the correct offset up to the
701
// original caller's SP.
702
let offset_upward_to_caller_sp = frame_layout.setup_area_size + incoming_args_diff;
703
if flags.unwind_info() && offset_upward_to_caller_sp > 0 {
704
// Emit unwind info: start the frame. The frame (from unwind
705
// consumers' point of view) starts at clobbbers, just below
706
// the FP and return address. Spill slots and stack slots are
707
// part of our actual frame but do not concern the unwinder.
708
insts.push(Inst::Unwind {
709
inst: UnwindInst::DefineNewFrame {
710
offset_downward_to_clobbers: frame_layout.clobber_size,
711
offset_upward_to_caller_sp,
712
},
713
});
714
}
715
716
// Adjust the stack pointer downward for clobbers and the function fixed
717
// frame (spillslots, storage slots, and argument area).
718
let stack_size = frame_layout.fixed_frame_storage_size
719
+ frame_layout.clobber_size
720
+ frame_layout.outgoing_args_size;
721
if stack_size > 0 {
722
let rsp = Writable::from_reg(regs::rsp());
723
let stack_size = i32::try_from(stack_size)
724
.expect("`stack_size` is too large to fit in a 32-bit immediate");
725
insts.push(Inst::subq_mi(rsp, stack_size));
726
}
727
728
// Store each clobbered register in order at offsets from RSP,
729
// placing them above the fixed frame slots.
730
let clobber_offset =
731
frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
732
let mut cur_offset = 0;
733
for reg in &frame_layout.clobbered_callee_saves {
734
let r_reg = reg.to_reg();
735
let ty = match r_reg.class() {
736
RegClass::Int => types::I64,
737
RegClass::Float => types::I8X16,
738
RegClass::Vector => unreachable!(),
739
};
740
741
// Align to 8 or 16 bytes as required by the storage type of the clobber.
742
cur_offset = align_to(cur_offset, ty.bytes());
743
let off = cur_offset;
744
cur_offset += ty.bytes();
745
746
insts.push(Inst::store(
747
ty,
748
r_reg.into(),
749
Amode::imm_reg(i32::try_from(off + clobber_offset).unwrap(), regs::rsp()),
750
));
751
752
if flags.unwind_info() {
753
insts.push(Inst::Unwind {
754
inst: UnwindInst::SaveReg {
755
clobber_offset: off,
756
reg: r_reg,
757
},
758
});
759
}
760
}
761
762
insts
763
}
764
765
fn gen_clobber_restore(
766
_call_conv: isa::CallConv,
767
_flags: &settings::Flags,
768
frame_layout: &FrameLayout,
769
) -> SmallVec<[Self::I; 16]> {
770
let mut insts = SmallVec::new();
771
772
// Restore regs by loading from offsets of RSP. We compute the offset from
773
// the same base as above in clobber_save, as RSP won't change between the
774
// prologue and epilogue.
775
let mut cur_offset =
776
frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
777
for reg in &frame_layout.clobbered_callee_saves {
778
let rreg = reg.to_reg();
779
let ty = match rreg.class() {
780
RegClass::Int => types::I64,
781
RegClass::Float => types::I8X16,
782
RegClass::Vector => unreachable!(),
783
};
784
785
// Align to 8 or 16 bytes as required by the storage type of the clobber.
786
cur_offset = align_to(cur_offset, ty.bytes());
787
788
insts.push(Inst::load(
789
ty,
790
Amode::imm_reg(cur_offset.try_into().unwrap(), regs::rsp()),
791
Writable::from_reg(rreg.into()),
792
ExtKind::None,
793
));
794
795
cur_offset += ty.bytes();
796
}
797
798
let stack_size = frame_layout.fixed_frame_storage_size
799
+ frame_layout.clobber_size
800
+ frame_layout.outgoing_args_size;
801
802
// Adjust RSP back upward.
803
if stack_size > 0 {
804
let rsp = Writable::from_reg(regs::rsp());
805
let stack_size = i32::try_from(stack_size)
806
.expect("`stack_size` is too large to fit in a 32-bit immediate");
807
insts.push(Inst::addq_mi(rsp, stack_size));
808
}
809
810
insts
811
}
812
813
fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
814
call_conv: isa::CallConv,
815
dst: Reg,
816
src: Reg,
817
size: usize,
818
mut alloc_tmp: F,
819
) -> SmallVec<[Self::I; 8]> {
820
let mut insts = SmallVec::new();
821
let arg0 = get_intreg_for_arg(call_conv, 0, 0).unwrap();
822
let arg1 = get_intreg_for_arg(call_conv, 1, 1).unwrap();
823
let arg2 = get_intreg_for_arg(call_conv, 2, 2).unwrap();
824
let temp = alloc_tmp(Self::word_type());
825
let temp2 = alloc_tmp(Self::word_type());
826
insts.push(Inst::imm(OperandSize::Size64, size as u64, temp));
827
// We use an indirect call and a full LoadExtName because we do not have
828
// information about the libcall `RelocDistance` here, so we
829
// conservatively use the more flexible calling sequence.
830
insts.push(Inst::LoadExtName {
831
dst: temp2.map(Gpr::unwrap_new),
832
name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),
833
offset: 0,
834
distance: RelocDistance::Far,
835
});
836
let callee_pop_size = 0;
837
insts.push(Inst::call_unknown(Box::new(CallInfo {
838
dest: RegMem::reg(temp2.to_reg()),
839
uses: smallvec![
840
CallArgPair {
841
vreg: dst,
842
preg: arg0
843
},
844
CallArgPair {
845
vreg: src,
846
preg: arg1
847
},
848
CallArgPair {
849
vreg: temp.to_reg(),
850
preg: arg2
851
},
852
],
853
defs: smallvec![],
854
clobbers: Self::get_regs_clobbered_by_call(call_conv, false),
855
callee_pop_size,
856
callee_conv: call_conv,
857
caller_conv: call_conv,
858
try_call_info: None,
859
})));
860
insts
861
}
862
863
fn get_number_of_spillslots_for_value(
864
rc: RegClass,
865
vector_scale: u32,
866
_isa_flags: &Self::F,
867
) -> u32 {
868
// We allocate in terms of 8-byte slots.
869
match rc {
870
RegClass::Int => 1,
871
RegClass::Float => vector_scale / 8,
872
RegClass::Vector => unreachable!(),
873
}
874
}
875
876
fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
877
if flags.enable_pinned_reg() {
878
static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
879
MACHINE_ENV.get_or_init(|| create_reg_env_systemv(true))
880
} else {
881
static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
882
MACHINE_ENV.get_or_init(|| create_reg_env_systemv(false))
883
}
884
}
885
886
fn get_regs_clobbered_by_call(
887
call_conv_of_callee: isa::CallConv,
888
is_exception: bool,
889
) -> PRegSet {
890
match call_conv_of_callee {
891
CallConv::Winch => ALL_CLOBBERS,
892
CallConv::WindowsFastcall => WINDOWS_CLOBBERS,
893
CallConv::Tail if is_exception => ALL_CLOBBERS,
894
_ => SYSV_CLOBBERS,
895
}
896
}
897
898
fn get_ext_mode(
899
_call_conv: isa::CallConv,
900
specified: ir::ArgumentExtension,
901
) -> ir::ArgumentExtension {
902
specified
903
}
904
905
fn compute_frame_layout(
906
call_conv: CallConv,
907
flags: &settings::Flags,
908
_sig: &Signature,
909
regs: &[Writable<RealReg>],
910
function_calls: FunctionCalls,
911
incoming_args_size: u32,
912
tail_args_size: u32,
913
stackslots_size: u32,
914
fixed_frame_storage_size: u32,
915
outgoing_args_size: u32,
916
) -> FrameLayout {
917
debug_assert!(tail_args_size >= incoming_args_size);
918
919
let mut regs: Vec<Writable<RealReg>> = match call_conv {
920
// The `winch` calling convention doesn't have any callee-save
921
// registers.
922
CallConv::Winch => vec![],
923
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::Tail => regs
924
.iter()
925
.cloned()
926
.filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))
927
.collect(),
928
CallConv::WindowsFastcall => regs
929
.iter()
930
.cloned()
931
.filter(|r| is_callee_save_fastcall(r.to_reg(), flags.enable_pinned_reg()))
932
.collect(),
933
CallConv::Probestack => todo!("probestack?"),
934
CallConv::AppleAarch64 => unreachable!(),
935
};
936
// Sort registers for deterministic code output. We can do an unstable sort because the
937
// registers will be unique (there are no dups).
938
regs.sort_unstable();
939
940
// Compute clobber size.
941
let clobber_size = compute_clobber_size(&regs);
942
943
// Compute setup area size.
944
let setup_area_size = 16; // RBP, return address
945
946
// Return FrameLayout structure.
947
FrameLayout {
948
word_bytes: 8,
949
incoming_args_size,
950
tail_args_size: align_to(tail_args_size, 16),
951
setup_area_size,
952
clobber_size,
953
fixed_frame_storage_size,
954
stackslots_size,
955
outgoing_args_size,
956
clobbered_callee_saves: regs,
957
function_calls,
958
}
959
}
960
961
fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {
962
// Use r11 as a temp: clobbered anyway, and
963
// not otherwise used as a return value in any of our
964
// supported calling conventions.
965
Writable::from_reg(regs::r11())
966
}
967
968
fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {
969
const PAYLOAD_REGS: &'static [Reg] = &[regs::rax(), regs::rdx()];
970
match call_conv {
971
isa::CallConv::SystemV | isa::CallConv::Tail => PAYLOAD_REGS,
972
_ => &[],
973
}
974
}
975
}
976
977
impl From<StackAMode> for SyntheticAmode {
978
fn from(amode: StackAMode) -> Self {
979
// We enforce a 128 MB stack-frame size limit above, so these
980
// `expect()`s should never fail.
981
match amode {
982
StackAMode::IncomingArg(off, stack_args_size) => {
983
let offset = u32::try_from(off).expect(
984
"Offset in IncomingArg is greater than 4GB; should hit impl limit first",
985
);
986
SyntheticAmode::IncomingArg {
987
offset: stack_args_size - offset,
988
}
989
}
990
StackAMode::Slot(off) => {
991
let off = i32::try_from(off)
992
.expect("Offset in Slot is greater than 2GB; should hit impl limit first");
993
SyntheticAmode::slot_offset(off)
994
}
995
StackAMode::OutgoingArg(off) => {
996
let off = i32::try_from(off).expect(
997
"Offset in OutgoingArg is greater than 2GB; should hit impl limit first",
998
);
999
SyntheticAmode::Real(Amode::ImmReg {
1000
simm32: off,
1001
base: regs::rsp(),
1002
flags: MemFlags::trusted(),
1003
})
1004
}
1005
}
1006
}
1007
}
1008
1009
fn get_intreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1010
let is_fastcall = call_conv == CallConv::WindowsFastcall;
1011
1012
// Fastcall counts by absolute argument number; SysV counts by argument of
1013
// this (integer) class.
1014
let i = if is_fastcall { arg_idx } else { idx };
1015
match (i, is_fastcall) {
1016
(0, false) => Some(regs::rdi()),
1017
(1, false) => Some(regs::rsi()),
1018
(2, false) => Some(regs::rdx()),
1019
(3, false) => Some(regs::rcx()),
1020
(4, false) => Some(regs::r8()),
1021
(5, false) => Some(regs::r9()),
1022
(0, true) => Some(regs::rcx()),
1023
(1, true) => Some(regs::rdx()),
1024
(2, true) => Some(regs::r8()),
1025
(3, true) => Some(regs::r9()),
1026
_ => None,
1027
}
1028
}
1029
1030
fn get_fltreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1031
let is_fastcall = call_conv == CallConv::WindowsFastcall;
1032
1033
// Fastcall counts by absolute argument number; SysV counts by argument of
1034
// this (floating-point) class.
1035
let i = if is_fastcall { arg_idx } else { idx };
1036
match (i, is_fastcall) {
1037
(0, false) => Some(regs::xmm0()),
1038
(1, false) => Some(regs::xmm1()),
1039
(2, false) => Some(regs::xmm2()),
1040
(3, false) => Some(regs::xmm3()),
1041
(4, false) => Some(regs::xmm4()),
1042
(5, false) => Some(regs::xmm5()),
1043
(6, false) => Some(regs::xmm6()),
1044
(7, false) => Some(regs::xmm7()),
1045
(0, true) => Some(regs::xmm0()),
1046
(1, true) => Some(regs::xmm1()),
1047
(2, true) => Some(regs::xmm2()),
1048
(3, true) => Some(regs::xmm3()),
1049
_ => None,
1050
}
1051
}
1052
1053
fn get_intreg_for_retval(
1054
call_conv: CallConv,
1055
flags: &settings::Flags,
1056
intreg_idx: usize,
1057
is_last: bool,
1058
) -> Option<Reg> {
1059
match call_conv {
1060
CallConv::Tail => match intreg_idx {
1061
0 => Some(regs::rax()),
1062
1 => Some(regs::rcx()),
1063
2 => Some(regs::rdx()),
1064
3 => Some(regs::rsi()),
1065
4 => Some(regs::rdi()),
1066
5 => Some(regs::r8()),
1067
6 => Some(regs::r9()),
1068
7 => Some(regs::r10()),
1069
// NB: `r11` is reserved as a scratch register that is
1070
// also part of the clobber set.
1071
// NB: `r15` is reserved as a scratch register.
1072
_ => None,
1073
},
1074
CallConv::Fast | CallConv::Cold | CallConv::SystemV => match intreg_idx {
1075
0 => Some(regs::rax()),
1076
1 => Some(regs::rdx()),
1077
2 if flags.enable_llvm_abi_extensions() => Some(regs::rcx()),
1078
_ => None,
1079
},
1080
CallConv::WindowsFastcall => match intreg_idx {
1081
0 => Some(regs::rax()),
1082
1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
1083
_ => None,
1084
},
1085
1086
CallConv::Winch => is_last.then(|| regs::rax()),
1087
CallConv::Probestack => todo!(),
1088
CallConv::AppleAarch64 => unreachable!(),
1089
}
1090
}
1091
1092
fn get_fltreg_for_retval(call_conv: CallConv, fltreg_idx: usize, is_last: bool) -> Option<Reg> {
1093
match call_conv {
1094
CallConv::Tail => match fltreg_idx {
1095
0 => Some(regs::xmm0()),
1096
1 => Some(regs::xmm1()),
1097
2 => Some(regs::xmm2()),
1098
3 => Some(regs::xmm3()),
1099
4 => Some(regs::xmm4()),
1100
5 => Some(regs::xmm5()),
1101
6 => Some(regs::xmm6()),
1102
7 => Some(regs::xmm7()),
1103
_ => None,
1104
},
1105
CallConv::Fast | CallConv::Cold | CallConv::SystemV => match fltreg_idx {
1106
0 => Some(regs::xmm0()),
1107
1 => Some(regs::xmm1()),
1108
_ => None,
1109
},
1110
CallConv::WindowsFastcall => match fltreg_idx {
1111
0 => Some(regs::xmm0()),
1112
_ => None,
1113
},
1114
CallConv::Winch => is_last.then(|| regs::xmm0()),
1115
CallConv::Probestack => todo!(),
1116
CallConv::AppleAarch64 => unreachable!(),
1117
}
1118
}
1119
1120
fn is_callee_save_systemv(r: RealReg, enable_pinned_reg: bool) -> bool {
1121
use asm::gpr::enc::*;
1122
1123
match r.class() {
1124
RegClass::Int => match r.hw_enc() {
1125
RBX | RBP | R12 | R13 | R14 => true,
1126
// R15 is the pinned register; if we're using it that way,
1127
// it is effectively globally-allocated, and is not
1128
// callee-saved.
1129
R15 => !enable_pinned_reg,
1130
_ => false,
1131
},
1132
RegClass::Float => false,
1133
RegClass::Vector => unreachable!(),
1134
}
1135
}
1136
1137
fn is_callee_save_fastcall(r: RealReg, enable_pinned_reg: bool) -> bool {
1138
use asm::gpr::enc::*;
1139
use asm::xmm::enc::*;
1140
1141
match r.class() {
1142
RegClass::Int => match r.hw_enc() {
1143
RBX | RBP | RSI | RDI | R12 | R13 | R14 => true,
1144
// See above for SysV: we must treat the pinned reg specially.
1145
R15 => !enable_pinned_reg,
1146
_ => false,
1147
},
1148
RegClass::Float => match r.hw_enc() {
1149
XMM6 | XMM7 | XMM8 | XMM9 | XMM10 | XMM11 | XMM12 | XMM13 | XMM14 | XMM15 => true,
1150
_ => false,
1151
},
1152
RegClass::Vector => unreachable!(),
1153
}
1154
}
1155
1156
fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
1157
let mut clobbered_size = 0;
1158
for reg in clobbers {
1159
match reg.to_reg().class() {
1160
RegClass::Int => {
1161
clobbered_size += 8;
1162
}
1163
RegClass::Float => {
1164
clobbered_size = align_to(clobbered_size, 16);
1165
clobbered_size += 16;
1166
}
1167
RegClass::Vector => unreachable!(),
1168
}
1169
}
1170
align_to(clobbered_size, 16)
1171
}
1172
1173
const WINDOWS_CLOBBERS: PRegSet = windows_clobbers();
1174
const SYSV_CLOBBERS: PRegSet = sysv_clobbers();
1175
pub(crate) const ALL_CLOBBERS: PRegSet = all_clobbers();
1176
1177
const fn windows_clobbers() -> PRegSet {
1178
use asm::gpr::enc::*;
1179
use asm::xmm::enc::*;
1180
1181
PRegSet::empty()
1182
.with(regs::gpr_preg(RAX))
1183
.with(regs::gpr_preg(RCX))
1184
.with(regs::gpr_preg(RDX))
1185
.with(regs::gpr_preg(R8))
1186
.with(regs::gpr_preg(R9))
1187
.with(regs::gpr_preg(R10))
1188
.with(regs::gpr_preg(R11))
1189
.with(regs::fpr_preg(XMM0))
1190
.with(regs::fpr_preg(XMM1))
1191
.with(regs::fpr_preg(XMM2))
1192
.with(regs::fpr_preg(XMM3))
1193
.with(regs::fpr_preg(XMM4))
1194
.with(regs::fpr_preg(XMM5))
1195
}
1196
1197
const fn sysv_clobbers() -> PRegSet {
1198
use asm::gpr::enc::*;
1199
use asm::xmm::enc::*;
1200
1201
PRegSet::empty()
1202
.with(regs::gpr_preg(RAX))
1203
.with(regs::gpr_preg(RCX))
1204
.with(regs::gpr_preg(RDX))
1205
.with(regs::gpr_preg(RSI))
1206
.with(regs::gpr_preg(RDI))
1207
.with(regs::gpr_preg(R8))
1208
.with(regs::gpr_preg(R9))
1209
.with(regs::gpr_preg(R10))
1210
.with(regs::gpr_preg(R11))
1211
.with(regs::fpr_preg(XMM0))
1212
.with(regs::fpr_preg(XMM1))
1213
.with(regs::fpr_preg(XMM2))
1214
.with(regs::fpr_preg(XMM3))
1215
.with(regs::fpr_preg(XMM4))
1216
.with(regs::fpr_preg(XMM5))
1217
.with(regs::fpr_preg(XMM6))
1218
.with(regs::fpr_preg(XMM7))
1219
.with(regs::fpr_preg(XMM8))
1220
.with(regs::fpr_preg(XMM9))
1221
.with(regs::fpr_preg(XMM10))
1222
.with(regs::fpr_preg(XMM11))
1223
.with(regs::fpr_preg(XMM12))
1224
.with(regs::fpr_preg(XMM13))
1225
.with(regs::fpr_preg(XMM14))
1226
.with(regs::fpr_preg(XMM15))
1227
}
1228
1229
/// For calling conventions that clobber all registers.
1230
const fn all_clobbers() -> PRegSet {
1231
use asm::gpr::enc::*;
1232
use asm::xmm::enc::*;
1233
1234
PRegSet::empty()
1235
.with(regs::gpr_preg(RAX))
1236
.with(regs::gpr_preg(RCX))
1237
.with(regs::gpr_preg(RDX))
1238
.with(regs::gpr_preg(RBX))
1239
.with(regs::gpr_preg(RSI))
1240
.with(regs::gpr_preg(RDI))
1241
.with(regs::gpr_preg(R8))
1242
.with(regs::gpr_preg(R9))
1243
.with(regs::gpr_preg(R10))
1244
.with(regs::gpr_preg(R11))
1245
.with(regs::gpr_preg(R12))
1246
.with(regs::gpr_preg(R13))
1247
.with(regs::gpr_preg(R14))
1248
.with(regs::gpr_preg(R15))
1249
.with(regs::fpr_preg(XMM0))
1250
.with(regs::fpr_preg(XMM1))
1251
.with(regs::fpr_preg(XMM2))
1252
.with(regs::fpr_preg(XMM3))
1253
.with(regs::fpr_preg(XMM4))
1254
.with(regs::fpr_preg(XMM5))
1255
.with(regs::fpr_preg(XMM6))
1256
.with(regs::fpr_preg(XMM7))
1257
.with(regs::fpr_preg(XMM8))
1258
.with(regs::fpr_preg(XMM9))
1259
.with(regs::fpr_preg(XMM10))
1260
.with(regs::fpr_preg(XMM11))
1261
.with(regs::fpr_preg(XMM12))
1262
.with(regs::fpr_preg(XMM13))
1263
.with(regs::fpr_preg(XMM14))
1264
.with(regs::fpr_preg(XMM15))
1265
}
1266
1267
fn create_reg_env_systemv(enable_pinned_reg: bool) -> MachineEnv {
1268
fn preg(r: Reg) -> PReg {
1269
r.to_real_reg().unwrap().into()
1270
}
1271
1272
let mut env = MachineEnv {
1273
preferred_regs_by_class: [
1274
// Preferred GPRs: caller-saved in the SysV ABI.
1275
vec![
1276
preg(regs::rsi()),
1277
preg(regs::rdi()),
1278
preg(regs::rax()),
1279
preg(regs::rcx()),
1280
preg(regs::rdx()),
1281
preg(regs::r8()),
1282
preg(regs::r9()),
1283
preg(regs::r10()),
1284
preg(regs::r11()),
1285
],
1286
// Preferred XMMs: the first 8, which can have smaller encodings
1287
// with AVX instructions.
1288
vec![
1289
preg(regs::xmm0()),
1290
preg(regs::xmm1()),
1291
preg(regs::xmm2()),
1292
preg(regs::xmm3()),
1293
preg(regs::xmm4()),
1294
preg(regs::xmm5()),
1295
preg(regs::xmm6()),
1296
preg(regs::xmm7()),
1297
],
1298
// The Vector Regclass is unused
1299
vec![],
1300
],
1301
non_preferred_regs_by_class: [
1302
// Non-preferred GPRs: callee-saved in the SysV ABI.
1303
vec![
1304
preg(regs::rbx()),
1305
preg(regs::r12()),
1306
preg(regs::r13()),
1307
preg(regs::r14()),
1308
],
1309
// Non-preferred XMMs: the last 8 registers, which can have larger
1310
// encodings with AVX instructions.
1311
vec![
1312
preg(regs::xmm8()),
1313
preg(regs::xmm9()),
1314
preg(regs::xmm10()),
1315
preg(regs::xmm11()),
1316
preg(regs::xmm12()),
1317
preg(regs::xmm13()),
1318
preg(regs::xmm14()),
1319
preg(regs::xmm15()),
1320
],
1321
// The Vector Regclass is unused
1322
vec![],
1323
],
1324
fixed_stack_slots: vec![],
1325
scratch_by_class: [None, None, None],
1326
};
1327
1328
debug_assert_eq!(regs::r15(), regs::pinned_reg());
1329
if !enable_pinned_reg {
1330
env.non_preferred_regs_by_class[0].push(preg(regs::r15()));
1331
}
1332
1333
env
1334
}
1335
1336