Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/x64/inst/mod.rs
3073 views
1
//! This module defines x86_64-specific machine instruction types.
2
3
pub use emit_state::EmitState;
4
5
use crate::binemit::{Addend, CodeOffset, Reloc};
6
use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
7
use crate::isa::x64::abi::X64ABIMachineSpec;
8
use crate::isa::x64::inst::regs::pretty_print_reg;
9
use crate::isa::x64::settings as x64_settings;
10
use crate::isa::{CallConv, FunctionAlignment};
11
use crate::{CodegenError, CodegenResult, settings};
12
use crate::{machinst::*, trace};
13
use alloc::boxed::Box;
14
use alloc::string::{String, ToString};
15
use alloc::vec;
16
use alloc::vec::Vec;
17
use core::fmt::{self, Write};
18
use core::slice;
19
use cranelift_assembler_x64 as asm;
20
use smallvec::{SmallVec, smallvec};
21
22
pub mod args;
23
mod emit;
24
mod emit_state;
25
#[cfg(test)]
26
mod emit_tests;
27
pub mod external;
28
pub mod regs;
29
mod stack_switch;
30
pub mod unwind;
31
32
use args::*;
33
34
//=============================================================================
35
// Instructions (top level): definition
36
37
// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.
38
pub use super::lower::isle::generated_code::AtomicRmwSeqOp;
39
pub use super::lower::isle::generated_code::MInst as Inst;
40
41
/// Out-of-line data for return-calls, to keep the size of `Inst` down.
42
#[derive(Clone, Debug)]
43
pub struct ReturnCallInfo<T> {
44
/// Where this call is going.
45
pub dest: T,
46
47
/// The size of the argument area for this return-call, potentially smaller than that of the
48
/// caller, but never larger.
49
pub new_stack_arg_size: u32,
50
51
/// The in-register arguments and their constraints.
52
pub uses: CallArgList,
53
54
/// A temporary for use when moving the return address.
55
pub tmp: WritableGpr,
56
}
57
58
#[test]
59
#[cfg(target_pointer_width = "64")]
60
fn inst_size_test() {
61
// This test will help with unintentionally growing the size
62
// of the Inst enum.
63
assert_eq!(48, core::mem::size_of::<Inst>());
64
}
65
66
impl Inst {
67
/// Check if the instruction (or pseudo-instruction) can be emitted given
68
/// the current target architecture given by `emit_info`. For non-assembler
69
/// instructions, this assumes a baseline feature set (i.e., 64-bit AND SSE2
70
/// and below).
71
fn is_available(&self, emit_info: &EmitInfo) -> bool {
72
use asm::AvailableFeatures;
73
74
match self {
75
// These instructions are part of SSE2, which is a basic requirement
76
// in Cranelift, and don't have to be checked.
77
Inst::AtomicRmwSeq { .. }
78
| Inst::CallKnown { .. }
79
| Inst::CallUnknown { .. }
80
| Inst::ReturnCallKnown { .. }
81
| Inst::ReturnCallUnknown { .. }
82
| Inst::CheckedSRemSeq { .. }
83
| Inst::CheckedSRemSeq8 { .. }
84
| Inst::CvtFloatToSintSeq { .. }
85
| Inst::CvtFloatToUintSeq { .. }
86
| Inst::CvtUint64ToFloatSeq { .. }
87
| Inst::JmpCond { .. }
88
| Inst::JmpCondOr { .. }
89
| Inst::WinchJmpIf { .. }
90
| Inst::JmpKnown { .. }
91
| Inst::JmpTableSeq { .. }
92
| Inst::LoadExtName { .. }
93
| Inst::MovFromPReg { .. }
94
| Inst::MovToPReg { .. }
95
| Inst::StackProbeLoop { .. }
96
| Inst::Args { .. }
97
| Inst::Rets { .. }
98
| Inst::StackSwitchBasic { .. }
99
| Inst::TrapIf { .. }
100
| Inst::TrapIfAnd { .. }
101
| Inst::TrapIfOr { .. }
102
| Inst::XmmCmove { .. }
103
| Inst::XmmMinMaxSeq { .. }
104
| Inst::XmmUninitializedValue { .. }
105
| Inst::GprUninitializedValue { .. }
106
| Inst::ElfTlsGetAddr { .. }
107
| Inst::MachOTlsGetAddr { .. }
108
| Inst::CoffTlsGetAddr { .. }
109
| Inst::Unwind { .. }
110
| Inst::DummyUse { .. }
111
| Inst::LabelAddress { .. }
112
| Inst::SequencePoint => true,
113
114
Inst::Atomic128RmwSeq { .. } | Inst::Atomic128XchgSeq { .. } => emit_info.cmpxchg16b(),
115
116
Inst::External { inst } => inst.is_available(&emit_info),
117
}
118
}
119
}
120
121
// Handy constructors for Insts.
122
123
impl Inst {
124
pub(crate) fn nop(len: u8) -> Self {
125
assert!(len > 0 && len <= 9);
126
let inst = match len {
127
1 => asm::inst::nop_1b::new().into(),
128
2 => asm::inst::nop_2b::new().into(),
129
3 => asm::inst::nop_3b::new().into(),
130
4 => asm::inst::nop_4b::new().into(),
131
5 => asm::inst::nop_5b::new().into(),
132
6 => asm::inst::nop_6b::new().into(),
133
7 => asm::inst::nop_7b::new().into(),
134
8 => asm::inst::nop_8b::new().into(),
135
9 => asm::inst::nop_9b::new().into(),
136
_ => unreachable!("nop length must be between 1 and 9"),
137
};
138
Self::External { inst }
139
}
140
141
pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
142
let inst = if let Ok(simm8) = i8::try_from(simm32) {
143
asm::inst::addq_mi_sxb::new(dst, simm8).into()
144
} else {
145
asm::inst::addq_mi_sxl::new(dst, simm32).into()
146
};
147
Inst::External { inst }
148
}
149
150
pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
151
let inst = if let Ok(simm8) = i8::try_from(simm32) {
152
asm::inst::subq_mi_sxb::new(dst, simm8).into()
153
} else {
154
asm::inst::subq_mi_sxl::new(dst, simm32).into()
155
};
156
Inst::External { inst }
157
}
158
159
/// Writes the `simm64` immedaite into `dst`.
160
///
161
/// Note that if `dst_size` is less than 64-bits then the upper bits of
162
/// `simm64` will be converted to zero.
163
pub fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
164
debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
165
debug_assert!(dst.to_reg().class() == RegClass::Int);
166
let dst = WritableGpr::from_writable_reg(dst).unwrap();
167
let inst = match dst_size {
168
OperandSize::Size64 => match u32::try_from(simm64) {
169
// If `simm64` is zero-extended use `movl` which zeros the
170
// upper bits.
171
Ok(imm32) => asm::inst::movl_oi::new(dst, imm32).into(),
172
_ => match i32::try_from(simm64.cast_signed()) {
173
// If `simm64` is sign-extended use `movq` which sign the
174
// upper bits.
175
Ok(simm32) => asm::inst::movq_mi_sxl::new(dst, simm32).into(),
176
// fall back to embedding the entire immediate.
177
_ => asm::inst::movabsq_oi::new(dst, simm64).into(),
178
},
179
},
180
// FIXME: the input to this function is a logical `simm64` stored
181
// as `u64`. That means that ideally what we would do here is cast
182
// the `simm64` to an `i64`, perform a `i32::try_from()`, then cast
183
// that back to `u32`. That would ensure that the immediate loses
184
// no meaning and has the same logical value. Currently though
185
// Cranelift relies on discarding the upper bits because literals
186
// like `0x8000_0000_u64` fail to convert to an `i32`. In theory
187
// the input to this function should change to `i64`. In the
188
// meantime this is documented as discarding the upper bits,
189
// although this is an old function so that's unlikely to help
190
// much.
191
_ => asm::inst::movl_oi::new(dst, simm64 as u32).into(),
192
};
193
Inst::External { inst }
194
}
195
196
pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
197
src.assert_regclass_is(RegClass::Int);
198
debug_assert!(dst.to_reg().class() == RegClass::Int);
199
let src = match src {
200
RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
201
RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
202
};
203
let inst = match ext_mode {
204
ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
205
ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
206
ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
207
ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
208
ExtMode::LQ => {
209
// This instruction selection may seem strange but is correct in
210
// 64-bit mode: section 3.4.1.1 of the Intel manual says that
211
// "32-bit operands generate a 32-bit result, zero-extended to a
212
// 64-bit result in the destination general-purpose register."
213
// This is applicable beyond `mov` but we use this fact to
214
// zero-extend `src` into `dst`.
215
asm::inst::movl_rm::new(dst, src).into()
216
}
217
};
218
Inst::External { inst }
219
}
220
221
pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
222
src.assert_regclass_is(RegClass::Int);
223
debug_assert!(dst.to_reg().class() == RegClass::Int);
224
let src = match src {
225
RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
226
RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
227
};
228
let inst = match ext_mode {
229
ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
230
ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
231
ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
232
ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
233
ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
234
};
235
Inst::External { inst }
236
}
237
238
/// Compares `src1` against `src2`
239
pub(crate) fn cmp_mi_sxb(size: OperandSize, src1: Gpr, src2: i8) -> Inst {
240
let inst = match size {
241
OperandSize::Size8 => asm::inst::cmpb_mi::new(src1, src2.cast_unsigned()).into(),
242
OperandSize::Size16 => asm::inst::cmpw_mi_sxb::new(src1, src2).into(),
243
OperandSize::Size32 => asm::inst::cmpl_mi_sxb::new(src1, src2).into(),
244
OperandSize::Size64 => asm::inst::cmpq_mi_sxb::new(src1, src2).into(),
245
};
246
Inst::External { inst }
247
}
248
249
pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
250
Inst::TrapIf { cc, trap_code }
251
}
252
253
pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {
254
Inst::CallKnown { info }
255
}
256
257
pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {
258
info.dest.assert_regclass_is(RegClass::Int);
259
Inst::CallUnknown { info }
260
}
261
262
pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
263
Inst::JmpKnown { dst }
264
}
265
266
/// Choose which instruction to use for loading a register value from memory. For loads smaller
267
/// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
268
/// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
269
pub(crate) fn load(
270
ty: Type,
271
from_addr: impl Into<SyntheticAmode>,
272
to_reg: Writable<Reg>,
273
ext_kind: ExtKind,
274
) -> Inst {
275
let rc = to_reg.to_reg().class();
276
match rc {
277
RegClass::Int => {
278
let ext_mode = match ty.bytes() {
279
1 => Some(ExtMode::BQ),
280
2 => Some(ExtMode::WQ),
281
4 => Some(ExtMode::LQ),
282
8 => None,
283
_ => unreachable!("the type should never use a scalar load: {}", ty),
284
};
285
if let Some(ext_mode) = ext_mode {
286
// Values smaller than 64 bits must be extended in some way.
287
match ext_kind {
288
ExtKind::SignExtend => {
289
Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
290
}
291
ExtKind::ZeroExtend => {
292
Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
293
}
294
ExtKind::None => {
295
panic!("expected an extension kind for extension mode: {ext_mode:?}")
296
}
297
}
298
} else {
299
// 64-bit values can be moved directly.
300
let from_addr = asm::GprMem::from(from_addr.into());
301
Inst::External {
302
inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),
303
}
304
}
305
}
306
RegClass::Float => {
307
let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());
308
let from_addr = from_addr.into();
309
let inst = match ty {
310
types::F16 | types::I8X2 => {
311
panic!("loading a f16 or i8x2 requires multiple instructions")
312
}
313
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
314
asm::inst::movss_a_m::new(to_reg, from_addr).into()
315
}
316
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
317
asm::inst::movsd_a_m::new(to_reg, from_addr).into()
318
}
319
types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),
320
types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),
321
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
322
asm::inst::movdqu_a::new(to_reg, from_addr).into()
323
}
324
_ => unimplemented!("unable to load type: {}", ty),
325
};
326
Inst::External { inst }
327
}
328
RegClass::Vector => unreachable!(),
329
}
330
}
331
332
/// Choose which instruction to use for storing a register value to memory.
333
pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
334
let rc = from_reg.class();
335
let to_addr = to_addr.into();
336
let inst = match rc {
337
RegClass::Int => {
338
let from_reg = Gpr::unwrap_new(from_reg);
339
match ty {
340
types::I8 => asm::inst::movb_mr::new(to_addr, from_reg).into(),
341
types::I16 => asm::inst::movw_mr::new(to_addr, from_reg).into(),
342
types::I32 => asm::inst::movl_mr::new(to_addr, from_reg).into(),
343
types::I64 => asm::inst::movq_mr::new(to_addr, from_reg).into(),
344
_ => unreachable!(),
345
}
346
}
347
RegClass::Float => {
348
let from_reg = Xmm::new(from_reg).unwrap();
349
match ty {
350
types::F16 | types::I8X2 => {
351
panic!("storing a f16 or i8x2 requires multiple instructions")
352
}
353
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
354
asm::inst::movss_c_m::new(to_addr, from_reg).into()
355
}
356
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
357
asm::inst::movsd_c_m::new(to_addr, from_reg).into()
358
}
359
types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),
360
types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),
361
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
362
asm::inst::movdqu_b::new(to_addr, from_reg).into()
363
}
364
_ => unimplemented!("unable to store type: {}", ty),
365
}
366
}
367
RegClass::Vector => unreachable!(),
368
};
369
Inst::External { inst }
370
}
371
}
372
373
//=============================================================================
374
// Instructions: printing
375
376
impl PrettyPrint for Inst {
377
fn pretty_print(&self, _size: u8) -> String {
378
fn ljustify(s: String) -> String {
379
let w = 7;
380
if s.len() >= w {
381
s
382
} else {
383
let need = usize::min(w, w - s.len());
384
s + &format!("{nil: <width$}", nil = "", width = need)
385
}
386
}
387
388
fn ljustify2(s1: String, s2: String) -> String {
389
ljustify(s1 + &s2)
390
}
391
392
match self {
393
Inst::CheckedSRemSeq {
394
size,
395
divisor,
396
dividend_lo,
397
dividend_hi,
398
dst_quotient,
399
dst_remainder,
400
} => {
401
let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());
402
let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());
403
let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());
404
let dst_quotient =
405
pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());
406
let dst_remainder =
407
pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());
408
format!(
409
"checked_srem_seq {dividend_lo}, {dividend_hi}, \
410
{divisor}, {dst_quotient}, {dst_remainder}",
411
)
412
}
413
414
Inst::CheckedSRemSeq8 {
415
divisor,
416
dividend,
417
dst,
418
} => {
419
let divisor = pretty_print_reg(divisor.to_reg(), 1);
420
let dividend = pretty_print_reg(dividend.to_reg(), 1);
421
let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
422
format!("checked_srem_seq {dividend}, {divisor}, {dst}")
423
}
424
425
Inst::XmmMinMaxSeq {
426
lhs,
427
rhs,
428
dst,
429
is_min,
430
size,
431
} => {
432
let rhs = pretty_print_reg(rhs.to_reg(), 8);
433
let lhs = pretty_print_reg(lhs.to_reg(), 8);
434
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
435
let op = ljustify2(
436
if *is_min {
437
"xmm min seq ".to_string()
438
} else {
439
"xmm max seq ".to_string()
440
},
441
format!("f{}", size.to_bits()),
442
);
443
format!("{op} {lhs}, {rhs}, {dst}")
444
}
445
446
Inst::XmmUninitializedValue { dst } => {
447
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
448
let op = ljustify("uninit".into());
449
format!("{op} {dst}")
450
}
451
452
Inst::GprUninitializedValue { dst } => {
453
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
454
let op = ljustify("uninit".into());
455
format!("{op} {dst}")
456
}
457
458
Inst::CvtUint64ToFloatSeq {
459
src,
460
dst,
461
dst_size,
462
tmp_gpr1,
463
tmp_gpr2,
464
..
465
} => {
466
let src = pretty_print_reg(src.to_reg(), 8);
467
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
468
let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);
469
let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);
470
let op = ljustify(format!(
471
"u64_to_{}_seq",
472
if *dst_size == OperandSize::Size64 {
473
"f64"
474
} else {
475
"f32"
476
}
477
));
478
format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")
479
}
480
481
Inst::CvtFloatToSintSeq {
482
src,
483
dst,
484
src_size,
485
dst_size,
486
tmp_xmm,
487
tmp_gpr,
488
is_saturating,
489
} => {
490
let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
491
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
492
let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
493
let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
494
let op = ljustify(format!(
495
"cvt_float{}_to_sint{}{}_seq",
496
src_size.to_bits(),
497
dst_size.to_bits(),
498
if *is_saturating { "_sat" } else { "" },
499
));
500
format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")
501
}
502
503
Inst::CvtFloatToUintSeq {
504
src,
505
dst,
506
src_size,
507
dst_size,
508
tmp_gpr,
509
tmp_xmm,
510
tmp_xmm2,
511
is_saturating,
512
} => {
513
let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
514
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
515
let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
516
let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
517
let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);
518
let op = ljustify(format!(
519
"cvt_float{}_to_uint{}{}_seq",
520
src_size.to_bits(),
521
dst_size.to_bits(),
522
if *is_saturating { "_sat" } else { "" },
523
));
524
format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")
525
}
526
527
Inst::MovFromPReg { src, dst } => {
528
let src: Reg = (*src).into();
529
let src = pretty_print_reg(src, 8);
530
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
531
let op = ljustify("movq".to_string());
532
format!("{op} {src}, {dst}")
533
}
534
535
Inst::MovToPReg { src, dst } => {
536
let src = pretty_print_reg(src.to_reg(), 8);
537
let dst: Reg = (*dst).into();
538
let dst = pretty_print_reg(dst, 8);
539
let op = ljustify("movq".to_string());
540
format!("{op} {src}, {dst}")
541
}
542
543
Inst::XmmCmove {
544
ty,
545
cc,
546
consequent,
547
alternative,
548
dst,
549
..
550
} => {
551
let size = u8::try_from(ty.bytes()).unwrap();
552
let alternative = pretty_print_reg(alternative.to_reg(), size);
553
let dst = pretty_print_reg(dst.to_reg().to_reg(), size);
554
let consequent = pretty_print_reg(consequent.to_reg(), size);
555
let suffix = match *ty {
556
types::F64 => "sd",
557
types::F32 => "ss",
558
types::F16 => "ss",
559
types::F32X4 => "aps",
560
types::F64X2 => "apd",
561
_ => "dqa",
562
};
563
let cc = cc.invert();
564
format!(
565
"mov{suffix} {alternative}, {dst}; \
566
j{cc} $next; \
567
mov{suffix} {consequent}, {dst}; \
568
$next:"
569
)
570
}
571
572
Inst::StackProbeLoop {
573
tmp,
574
frame_size,
575
guard_size,
576
} => {
577
let tmp = pretty_print_reg(tmp.to_reg(), 8);
578
let op = ljustify("stack_probe_loop".to_string());
579
format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")
580
}
581
582
Inst::CallKnown { info } => {
583
let op = ljustify("call".to_string());
584
let try_call = info
585
.try_call_info
586
.as_ref()
587
.map(|tci| pretty_print_try_call(tci))
588
.unwrap_or_default();
589
format!("{op} {:?}{try_call}", info.dest)
590
}
591
592
Inst::CallUnknown { info } => {
593
let dest = info.dest.pretty_print(8);
594
let op = ljustify("call".to_string());
595
let try_call = info
596
.try_call_info
597
.as_ref()
598
.map(|tci| pretty_print_try_call(tci))
599
.unwrap_or_default();
600
format!("{op} *{dest}{try_call}")
601
}
602
603
Inst::ReturnCallKnown { info } => {
604
let ReturnCallInfo {
605
uses,
606
new_stack_arg_size,
607
tmp,
608
dest,
609
} = &**info;
610
let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
611
let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");
612
for ret in uses {
613
let preg = pretty_print_reg(ret.preg, 8);
614
let vreg = pretty_print_reg(ret.vreg, 8);
615
write!(&mut s, " {vreg}={preg}").unwrap();
616
}
617
s
618
}
619
620
Inst::ReturnCallUnknown { info } => {
621
let ReturnCallInfo {
622
uses,
623
new_stack_arg_size,
624
tmp,
625
dest,
626
} = &**info;
627
let callee = pretty_print_reg(*dest, 8);
628
let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
629
let mut s =
630
format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");
631
for ret in uses {
632
let preg = pretty_print_reg(ret.preg, 8);
633
let vreg = pretty_print_reg(ret.vreg, 8);
634
write!(&mut s, " {vreg}={preg}").unwrap();
635
}
636
s
637
}
638
639
Inst::Args { args } => {
640
let mut s = "args".to_string();
641
for arg in args {
642
let preg = pretty_print_reg(arg.preg, 8);
643
let def = pretty_print_reg(arg.vreg.to_reg(), 8);
644
write!(&mut s, " {def}={preg}").unwrap();
645
}
646
s
647
}
648
649
Inst::Rets { rets } => {
650
let mut s = "rets".to_string();
651
for ret in rets {
652
let preg = pretty_print_reg(ret.preg, 8);
653
let vreg = pretty_print_reg(ret.vreg, 8);
654
write!(&mut s, " {vreg}={preg}").unwrap();
655
}
656
s
657
}
658
659
Inst::StackSwitchBasic {
660
store_context_ptr,
661
load_context_ptr,
662
in_payload0,
663
out_payload0,
664
} => {
665
let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);
666
let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);
667
let in_payload0 = pretty_print_reg(**in_payload0, 8);
668
let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);
669
format!(
670
"{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"
671
)
672
}
673
674
Inst::JmpKnown { dst } => {
675
let op = ljustify("jmp".to_string());
676
let dst = dst.to_string();
677
format!("{op} {dst}")
678
}
679
680
Inst::WinchJmpIf { cc, taken } => {
681
let taken = taken.to_string();
682
let op = ljustify2("j".to_string(), cc.to_string());
683
format!("{op} {taken}")
684
}
685
686
Inst::JmpCondOr {
687
cc1,
688
cc2,
689
taken,
690
not_taken,
691
} => {
692
let taken = taken.to_string();
693
let not_taken = not_taken.to_string();
694
let op = ljustify(format!("j{cc1},{cc2}"));
695
format!("{op} {taken}; j {not_taken}")
696
}
697
698
Inst::JmpCond {
699
cc,
700
taken,
701
not_taken,
702
} => {
703
let taken = taken.to_string();
704
let not_taken = not_taken.to_string();
705
let op = ljustify2("j".to_string(), cc.to_string());
706
format!("{op} {taken}; j {not_taken}")
707
}
708
709
Inst::JmpTableSeq {
710
idx, tmp1, tmp2, ..
711
} => {
712
let idx = pretty_print_reg(*idx, 8);
713
let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);
714
let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);
715
let op = ljustify("br_table".into());
716
format!("{op} {idx}, {tmp1}, {tmp2}")
717
}
718
719
Inst::TrapIf { cc, trap_code, .. } => {
720
format!("j{cc} #trap={trap_code}")
721
}
722
723
Inst::TrapIfAnd {
724
cc1,
725
cc2,
726
trap_code,
727
..
728
} => {
729
let cc1 = cc1.invert();
730
let cc2 = cc2.invert();
731
format!("trap_if_and {cc1}, {cc2}, {trap_code}")
732
}
733
734
Inst::TrapIfOr {
735
cc1,
736
cc2,
737
trap_code,
738
..
739
} => {
740
let cc2 = cc2.invert();
741
format!("trap_if_or {cc1}, {cc2}, {trap_code}")
742
}
743
744
Inst::LoadExtName {
745
dst, name, offset, ..
746
} => {
747
let dst = pretty_print_reg(*dst.to_reg(), 8);
748
let name = name.display(None);
749
let op = ljustify("load_ext_name".into());
750
format!("{op} {name}+{offset}, {dst}")
751
}
752
753
Inst::AtomicRmwSeq { ty, op, .. } => {
754
let ty = ty.bits();
755
format!(
756
"atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"
757
)
758
}
759
760
Inst::Atomic128RmwSeq {
761
op,
762
mem,
763
operand_low,
764
operand_high,
765
temp_low,
766
temp_high,
767
dst_old_low,
768
dst_old_high,
769
} => {
770
let operand_low = pretty_print_reg(**operand_low, 8);
771
let operand_high = pretty_print_reg(**operand_high, 8);
772
let temp_low = pretty_print_reg(*temp_low.to_reg(), 8);
773
let temp_high = pretty_print_reg(*temp_high.to_reg(), 8);
774
let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
775
let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
776
let mem = mem.pretty_print(16);
777
format!(
778
"atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"
779
)
780
}
781
782
Inst::Atomic128XchgSeq {
783
mem,
784
operand_low,
785
operand_high,
786
dst_old_low,
787
dst_old_high,
788
} => {
789
let operand_low = pretty_print_reg(**operand_low, 8);
790
let operand_high = pretty_print_reg(**operand_high, 8);
791
let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
792
let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
793
let mem = mem.pretty_print(16);
794
format!(
795
"atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"
796
)
797
}
798
799
Inst::ElfTlsGetAddr { symbol, dst } => {
800
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
801
format!("{dst} = elf_tls_get_addr {symbol:?}")
802
}
803
804
Inst::MachOTlsGetAddr { symbol, dst } => {
805
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
806
format!("{dst} = macho_tls_get_addr {symbol:?}")
807
}
808
809
Inst::CoffTlsGetAddr { symbol, dst, tmp } => {
810
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
811
let tmp = tmp.to_reg().to_reg();
812
813
let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");
814
if tmp.is_virtual() {
815
let tmp = pretty_print_reg(tmp, 8);
816
write!(&mut s, ", {tmp}").unwrap();
817
};
818
819
s
820
}
821
822
Inst::Unwind { inst } => format!("unwind {inst:?}"),
823
824
Inst::DummyUse { reg } => {
825
let reg = pretty_print_reg(*reg, 8);
826
format!("dummy_use {reg}")
827
}
828
829
Inst::LabelAddress { dst, label } => {
830
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
831
format!("label_address {dst}, {label:?}")
832
}
833
834
Inst::SequencePoint {} => {
835
format!("sequence_point")
836
}
837
838
Inst::External { inst } => {
839
format!("{inst}")
840
}
841
}
842
}
843
}
844
845
fn pretty_print_try_call(info: &TryCallInfo) -> String {
846
format!(
847
"; jmp {:?}; catch [{}]",
848
info.continuation,
849
info.pretty_print_dests()
850
)
851
}
852
853
impl fmt::Debug for Inst {
854
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
855
write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))
856
}
857
}
858
859
fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
860
// Note: because we need to statically know the indices of each
861
// reg in the operands list in order to fetch its allocation
862
// later, we put the variable-operand-count bits (the RegMem,
863
// RegMemImm, etc args) last. regalloc2 doesn't care what order
864
// the operands come in; they can be freely reordered.
865
866
// N.B.: we MUST keep the below in careful sync with (i) emission,
867
// in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
868
// method above.
869
match inst {
870
Inst::CheckedSRemSeq {
871
divisor,
872
dividend_lo,
873
dividend_hi,
874
dst_quotient,
875
dst_remainder,
876
..
877
} => {
878
collector.reg_use(divisor);
879
collector.reg_fixed_use(dividend_lo, regs::rax());
880
collector.reg_fixed_use(dividend_hi, regs::rdx());
881
collector.reg_fixed_def(dst_quotient, regs::rax());
882
collector.reg_fixed_def(dst_remainder, regs::rdx());
883
}
884
Inst::CheckedSRemSeq8 {
885
divisor,
886
dividend,
887
dst,
888
..
889
} => {
890
collector.reg_use(divisor);
891
collector.reg_fixed_use(dividend, regs::rax());
892
collector.reg_fixed_def(dst, regs::rax());
893
}
894
Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),
895
Inst::GprUninitializedValue { dst } => collector.reg_def(dst),
896
Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
897
collector.reg_use(rhs);
898
collector.reg_use(lhs);
899
collector.reg_reuse_def(dst, 0); // Reuse RHS.
900
}
901
Inst::MovFromPReg { dst, src } => {
902
debug_assert!(dst.to_reg().to_reg().is_virtual());
903
collector.reg_fixed_nonallocatable(*src);
904
collector.reg_def(dst);
905
}
906
Inst::MovToPReg { dst, src } => {
907
debug_assert!(src.to_reg().is_virtual());
908
collector.reg_use(src);
909
collector.reg_fixed_nonallocatable(*dst);
910
}
911
Inst::CvtUint64ToFloatSeq {
912
src,
913
dst,
914
tmp_gpr1,
915
tmp_gpr2,
916
..
917
} => {
918
collector.reg_use(src);
919
collector.reg_early_def(dst);
920
collector.reg_early_def(tmp_gpr1);
921
collector.reg_early_def(tmp_gpr2);
922
}
923
Inst::CvtFloatToSintSeq {
924
src,
925
dst,
926
tmp_xmm,
927
tmp_gpr,
928
..
929
} => {
930
collector.reg_use(src);
931
collector.reg_early_def(dst);
932
collector.reg_early_def(tmp_gpr);
933
collector.reg_early_def(tmp_xmm);
934
}
935
Inst::CvtFloatToUintSeq {
936
src,
937
dst,
938
tmp_gpr,
939
tmp_xmm,
940
tmp_xmm2,
941
..
942
} => {
943
collector.reg_use(src);
944
collector.reg_early_def(dst);
945
collector.reg_early_def(tmp_gpr);
946
collector.reg_early_def(tmp_xmm);
947
collector.reg_early_def(tmp_xmm2);
948
}
949
950
Inst::XmmCmove {
951
consequent,
952
alternative,
953
dst,
954
..
955
} => {
956
collector.reg_use(alternative);
957
collector.reg_reuse_def(dst, 0);
958
collector.reg_use(consequent);
959
}
960
Inst::StackProbeLoop { tmp, .. } => {
961
collector.reg_early_def(tmp);
962
}
963
964
Inst::CallKnown { info } => {
965
// Probestack is special and is only inserted after
966
// regalloc, so we do not need to represent its ABI to the
967
// register allocator. Assert that we don't alter that
968
// arrangement.
969
let CallInfo {
970
uses,
971
defs,
972
clobbers,
973
dest,
974
try_call_info,
975
..
976
} = &mut **info;
977
debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
978
for CallArgPair { vreg, preg } in uses {
979
collector.reg_fixed_use(vreg, *preg);
980
}
981
for CallRetPair { vreg, location } in defs {
982
match location {
983
RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
984
RetLocation::Stack(..) => collector.any_def(vreg),
985
}
986
}
987
collector.reg_clobbers(*clobbers);
988
if let Some(try_call_info) = try_call_info {
989
try_call_info.collect_operands(collector);
990
}
991
}
992
993
Inst::CallUnknown { info } => {
994
let CallInfo {
995
uses,
996
defs,
997
clobbers,
998
callee_conv,
999
dest,
1000
try_call_info,
1001
..
1002
} = &mut **info;
1003
match dest {
1004
RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {
1005
// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1006
// This shouldn't be a fixed register constraint. r10 is caller-saved, so this
1007
// should be safe to use.
1008
collector.reg_fixed_use(reg, regs::r10());
1009
}
1010
_ => dest.get_operands(collector),
1011
}
1012
for CallArgPair { vreg, preg } in uses {
1013
collector.reg_fixed_use(vreg, *preg);
1014
}
1015
for CallRetPair { vreg, location } in defs {
1016
match location {
1017
RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1018
RetLocation::Stack(..) => collector.any_def(vreg),
1019
}
1020
}
1021
collector.reg_clobbers(*clobbers);
1022
if let Some(try_call_info) = try_call_info {
1023
try_call_info.collect_operands(collector);
1024
}
1025
}
1026
Inst::StackSwitchBasic {
1027
store_context_ptr,
1028
load_context_ptr,
1029
in_payload0,
1030
out_payload0,
1031
} => {
1032
collector.reg_use(load_context_ptr);
1033
collector.reg_use(store_context_ptr);
1034
collector.reg_fixed_use(in_payload0, stack_switch::payload_register());
1035
collector.reg_fixed_def(out_payload0, stack_switch::payload_register());
1036
1037
let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;
1038
// The return/payload reg must not be included in the clobber set
1039
clobbers.remove(
1040
stack_switch::payload_register()
1041
.to_real_reg()
1042
.unwrap()
1043
.into(),
1044
);
1045
collector.reg_clobbers(clobbers);
1046
}
1047
1048
Inst::ReturnCallKnown { info } => {
1049
let ReturnCallInfo {
1050
dest, uses, tmp, ..
1051
} = &mut **info;
1052
collector.reg_fixed_def(tmp, regs::r11());
1053
// Same as in the `Inst::CallKnown` branch.
1054
debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1055
for CallArgPair { vreg, preg } in uses {
1056
collector.reg_fixed_use(vreg, *preg);
1057
}
1058
}
1059
1060
Inst::ReturnCallUnknown { info } => {
1061
let ReturnCallInfo {
1062
dest, uses, tmp, ..
1063
} = &mut **info;
1064
1065
// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1066
// This shouldn't be a fixed register constraint, but it's not clear how to
1067
// pick a register that won't be clobbered by the callee-save restore code
1068
// emitted with a return_call_indirect. r10 is caller-saved, so this should be
1069
// safe to use.
1070
collector.reg_fixed_use(dest, regs::r10());
1071
1072
collector.reg_fixed_def(tmp, regs::r11());
1073
for CallArgPair { vreg, preg } in uses {
1074
collector.reg_fixed_use(vreg, *preg);
1075
}
1076
}
1077
1078
Inst::JmpTableSeq {
1079
idx, tmp1, tmp2, ..
1080
} => {
1081
collector.reg_use(idx);
1082
collector.reg_early_def(tmp1);
1083
// In the sequence emitted for this pseudoinstruction in emit.rs,
1084
// tmp2 is only written after idx is read, so it doesn't need to be
1085
// an early def.
1086
collector.reg_def(tmp2);
1087
}
1088
1089
Inst::LoadExtName { dst, .. } => {
1090
collector.reg_def(dst);
1091
}
1092
1093
Inst::AtomicRmwSeq {
1094
operand,
1095
temp,
1096
dst_old,
1097
mem,
1098
..
1099
} => {
1100
collector.reg_late_use(operand);
1101
collector.reg_early_def(temp);
1102
// This `fixed_def` is needed because `CMPXCHG` always uses this
1103
// register implicitly.
1104
collector.reg_fixed_def(dst_old, regs::rax());
1105
mem.get_operands_late(collector)
1106
}
1107
1108
Inst::Atomic128RmwSeq {
1109
operand_low,
1110
operand_high,
1111
temp_low,
1112
temp_high,
1113
dst_old_low,
1114
dst_old_high,
1115
mem,
1116
..
1117
} => {
1118
// All registers are collected in the `Late` position so that they don't overlap.
1119
collector.reg_late_use(operand_low);
1120
collector.reg_late_use(operand_high);
1121
collector.reg_fixed_def(temp_low, regs::rbx());
1122
collector.reg_fixed_def(temp_high, regs::rcx());
1123
collector.reg_fixed_def(dst_old_low, regs::rax());
1124
collector.reg_fixed_def(dst_old_high, regs::rdx());
1125
mem.get_operands_late(collector)
1126
}
1127
1128
Inst::Atomic128XchgSeq {
1129
operand_low,
1130
operand_high,
1131
dst_old_low,
1132
dst_old_high,
1133
mem,
1134
..
1135
} => {
1136
// All registers are collected in the `Late` position so that they don't overlap.
1137
collector.reg_fixed_late_use(operand_low, regs::rbx());
1138
collector.reg_fixed_late_use(operand_high, regs::rcx());
1139
collector.reg_fixed_def(dst_old_low, regs::rax());
1140
collector.reg_fixed_def(dst_old_high, regs::rdx());
1141
mem.get_operands_late(collector)
1142
}
1143
1144
Inst::Args { args } => {
1145
for ArgPair { vreg, preg } in args {
1146
collector.reg_fixed_def(vreg, *preg);
1147
}
1148
}
1149
1150
Inst::Rets { rets } => {
1151
// The return value(s) are live-out; we represent this
1152
// with register uses on the return instruction.
1153
for RetPair { vreg, preg } in rets {
1154
collector.reg_fixed_use(vreg, *preg);
1155
}
1156
}
1157
1158
Inst::JmpKnown { .. }
1159
| Inst::WinchJmpIf { .. }
1160
| Inst::JmpCond { .. }
1161
| Inst::JmpCondOr { .. }
1162
| Inst::TrapIf { .. }
1163
| Inst::TrapIfAnd { .. }
1164
| Inst::TrapIfOr { .. } => {
1165
// No registers are used.
1166
}
1167
1168
Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
1169
collector.reg_fixed_def(dst, regs::rax());
1170
// All caller-saves are clobbered.
1171
//
1172
// We use the SysV calling convention here because the
1173
// pseudoinstruction (and relocation that it emits) is specific to
1174
// ELF systems; other x86-64 targets with other conventions (i.e.,
1175
// Windows) use different TLS strategies.
1176
let mut clobbers =
1177
X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);
1178
clobbers.remove(regs::gpr_preg(asm::gpr::enc::RAX));
1179
collector.reg_clobbers(clobbers);
1180
}
1181
1182
Inst::CoffTlsGetAddr { dst, tmp, .. } => {
1183
// We also use the gs register. But that register is not allocatable by the
1184
// register allocator, so we don't need to mark it as used here.
1185
1186
// We use %rax to set the address
1187
collector.reg_fixed_def(dst, regs::rax());
1188
1189
// We use %rcx as a temporary variable to load the _tls_index
1190
collector.reg_fixed_def(tmp, regs::rcx());
1191
}
1192
1193
Inst::Unwind { .. } => {}
1194
1195
Inst::DummyUse { reg } => {
1196
collector.reg_use(reg);
1197
}
1198
1199
Inst::LabelAddress { dst, .. } => {
1200
collector.reg_def(dst);
1201
}
1202
1203
Inst::SequencePoint { .. } => {}
1204
1205
Inst::External { inst } => {
1206
inst.visit(&mut external::RegallocVisitor { collector });
1207
}
1208
}
1209
}
1210
1211
//=============================================================================
1212
// Instructions: misc functions and external interface
1213
1214
impl MachInst for Inst {
1215
type ABIMachineSpec = X64ABIMachineSpec;
1216
1217
fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
1218
x64_get_operands(self, collector)
1219
}
1220
1221
fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
1222
use asm::inst::Inst as I;
1223
match self {
1224
// Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
1225
// out the upper 32 bits of the destination. For example, we could
1226
// conceivably use `movl %reg, %reg` to zero out the top 32 bits of
1227
// %reg.
1228
Self::External {
1229
inst: I::movq_mr(asm::inst::movq_mr { rm64, r64 }),
1230
} => match rm64 {
1231
asm::GprMem::Gpr(reg) => Some((reg.map(|r| r.to_reg()), r64.as_ref().to_reg())),
1232
asm::GprMem::Mem(_) => None,
1233
},
1234
Self::External {
1235
inst: I::movq_rm(asm::inst::movq_rm { r64, rm64 }),
1236
} => match rm64 {
1237
asm::GprMem::Gpr(reg) => Some((r64.as_ref().map(|r| r.to_reg()), reg.to_reg())),
1238
asm::GprMem::Mem(_) => None,
1239
},
1240
1241
// Note that `movss_a_r` and `movsd_a_r` are specifically omitted
1242
// here because they only overwrite the low bits in the destination
1243
// register, otherwise preserving the upper bits. That can be used
1244
// for lane-insertion instructions, for example, meaning it's not
1245
// classified as a register move.
1246
//
1247
// Otherwise though all register-to-register movement instructions
1248
// which move 128-bits are registered as moves.
1249
Self::External {
1250
inst:
1251
I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })
1252
| I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })
1253
| I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })
1254
| I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })
1255
| I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })
1256
| I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),
1257
} => match xmm_m128 {
1258
asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),
1259
asm::XmmMem::Mem(_) => None,
1260
},
1261
// In addition to the "A" format of instructions above also
1262
// recognize the "B" format which while it can be used for stores it
1263
// can also be used for register moves.
1264
Self::External {
1265
inst:
1266
I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })
1267
| I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })
1268
| I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })
1269
| I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })
1270
| I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })
1271
| I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),
1272
} => match xmm_m128 {
1273
asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),
1274
asm::XmmMem::Mem(_) => None,
1275
},
1276
_ => None,
1277
}
1278
}
1279
1280
fn is_included_in_clobbers(&self) -> bool {
1281
match self {
1282
&Inst::Args { .. } => false,
1283
_ => true,
1284
}
1285
}
1286
1287
fn is_trap(&self) -> bool {
1288
match self {
1289
Self::External {
1290
inst: asm::inst::Inst::ud2_zo(..),
1291
} => true,
1292
_ => false,
1293
}
1294
}
1295
1296
fn is_args(&self) -> bool {
1297
match self {
1298
Self::Args { .. } => true,
1299
_ => false,
1300
}
1301
}
1302
1303
fn call_type(&self) -> CallType {
1304
match self {
1305
Inst::CallKnown { .. }
1306
| Inst::CallUnknown { .. }
1307
| Inst::ElfTlsGetAddr { .. }
1308
| Inst::MachOTlsGetAddr { .. } => CallType::Regular,
1309
1310
Inst::ReturnCallKnown { .. } | Inst::ReturnCallUnknown { .. } => CallType::TailCall,
1311
1312
_ => CallType::None,
1313
}
1314
}
1315
1316
fn is_term(&self) -> MachTerminator {
1317
match self {
1318
// Interesting cases.
1319
&Self::Rets { .. } => MachTerminator::Ret,
1320
&Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {
1321
MachTerminator::RetCall
1322
}
1323
&Self::JmpKnown { .. } => MachTerminator::Branch,
1324
&Self::JmpCond { .. } => MachTerminator::Branch,
1325
&Self::JmpCondOr { .. } => MachTerminator::Branch,
1326
&Self::JmpTableSeq { .. } => MachTerminator::Branch,
1327
&Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1328
&Self::CallUnknown { ref info } if info.try_call_info.is_some() => {
1329
MachTerminator::Branch
1330
}
1331
// All other cases are boring.
1332
_ => MachTerminator::None,
1333
}
1334
}
1335
1336
fn is_low_level_branch(&self) -> bool {
1337
match self {
1338
&Self::WinchJmpIf { .. } => true,
1339
_ => false,
1340
}
1341
}
1342
1343
fn is_mem_access(&self) -> bool {
1344
panic!("TODO FILL ME OUT")
1345
}
1346
1347
fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
1348
trace!(
1349
"Inst::gen_move {:?} -> {:?} (type: {:?})",
1350
src_reg,
1351
dst_reg.to_reg(),
1352
ty
1353
);
1354
let rc_dst = dst_reg.to_reg().class();
1355
let rc_src = src_reg.class();
1356
// If this isn't true, we have gone way off the rails.
1357
debug_assert!(rc_dst == rc_src);
1358
let inst = match rc_dst {
1359
RegClass::Int => {
1360
asm::inst::movq_mr::new(dst_reg.map(Gpr::unwrap_new), Gpr::unwrap_new(src_reg))
1361
.into()
1362
}
1363
RegClass::Float => {
1364
// The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
1365
// doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
1366
// those, which may write more lanes that we need, but are specified to have
1367
// zero-latency.
1368
let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());
1369
let src_reg = Xmm::new(src_reg).unwrap();
1370
match ty {
1371
types::F16 | types::F32 | types::F64 | types::F32X4 => {
1372
asm::inst::movaps_a::new(dst_reg, src_reg).into()
1373
}
1374
types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),
1375
_ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {
1376
asm::inst::movdqa_a::new(dst_reg, src_reg).into()
1377
}
1378
_ => unimplemented!("unable to move type: {}", ty),
1379
}
1380
}
1381
RegClass::Vector => unreachable!(),
1382
};
1383
Inst::External { inst }
1384
}
1385
1386
fn gen_nop(preferred_size: usize) -> Inst {
1387
Inst::nop(core::cmp::min(preferred_size, 9) as u8)
1388
}
1389
1390
fn gen_nop_units() -> Vec<Vec<u8>> {
1391
vec![
1392
// Standard 1-byte NOP.
1393
vec![0x90],
1394
// 5-byte NOP useful for patching out patchable calls.
1395
vec![0x0f, 0x1f, 0x44, 0x00, 0x00],
1396
]
1397
}
1398
1399
fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1400
match ty {
1401
types::I8 => Ok((&[RegClass::Int], &[types::I8])),
1402
types::I16 => Ok((&[RegClass::Int], &[types::I16])),
1403
types::I32 => Ok((&[RegClass::Int], &[types::I32])),
1404
types::I64 => Ok((&[RegClass::Int], &[types::I64])),
1405
types::F16 => Ok((&[RegClass::Float], &[types::F16])),
1406
types::F32 => Ok((&[RegClass::Float], &[types::F32])),
1407
types::F64 => Ok((&[RegClass::Float], &[types::F64])),
1408
types::F128 => Ok((&[RegClass::Float], &[types::F128])),
1409
types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
1410
_ if ty.is_vector() && ty.bits() <= 128 => {
1411
let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1412
Ok((
1413
&[RegClass::Float],
1414
slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1415
))
1416
}
1417
_ => Err(CodegenError::Unsupported(format!(
1418
"Unexpected SSA-value type: {ty}"
1419
))),
1420
}
1421
}
1422
1423
fn canonical_type_for_rc(rc: RegClass) -> Type {
1424
match rc {
1425
RegClass::Float => types::I8X16,
1426
RegClass::Int => types::I64,
1427
RegClass::Vector => unreachable!(),
1428
}
1429
}
1430
1431
fn gen_jump(label: MachLabel) -> Inst {
1432
Inst::jmp_known(label)
1433
}
1434
1435
fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {
1436
Some(Inst::imm(OperandSize::Size64, value, dst))
1437
}
1438
1439
fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
1440
let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);
1441
let gpr_to_xmm = Inst::External {
1442
inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),
1443
};
1444
smallvec![imm_to_gpr, gpr_to_xmm]
1445
}
1446
1447
fn gen_dummy_use(reg: Reg) -> Self {
1448
Inst::DummyUse { reg }
1449
}
1450
1451
fn worst_case_size() -> CodeOffset {
1452
15
1453
}
1454
1455
fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1456
RegClass::Int
1457
}
1458
1459
fn is_safepoint(&self) -> bool {
1460
match self {
1461
Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,
1462
_ => false,
1463
}
1464
}
1465
1466
fn function_alignment() -> FunctionAlignment {
1467
FunctionAlignment {
1468
minimum: 1,
1469
// Change the alignment from 16-bytes to 32-bytes for better performance.
1470
// fix-8573: https://github.com/bytecodealliance/wasmtime/issues/8573
1471
preferred: 32,
1472
}
1473
}
1474
1475
type LabelUse = LabelUse;
1476
1477
const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];
1478
}
1479
1480
/// Constant state used during emissions of a sequence of instructions.
1481
pub struct EmitInfo {
1482
pub(super) flags: settings::Flags,
1483
isa_flags: x64_settings::Flags,
1484
}
1485
1486
impl EmitInfo {
1487
/// Create a constant state for emission of instructions.
1488
pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
1489
Self { flags, isa_flags }
1490
}
1491
}
1492
1493
impl asm::AvailableFeatures for &EmitInfo {
1494
fn _64b(&self) -> bool {
1495
// Currently, this x64 backend always assumes 64-bit mode.
1496
true
1497
}
1498
1499
fn compat(&self) -> bool {
1500
// For 32-bit compatibility mode, see
1501
// https://github.com/bytecodealliance/wasmtime/issues/1980 (TODO).
1502
false
1503
}
1504
1505
fn sse(&self) -> bool {
1506
// Currently, this x64 backend always assumes SSE.
1507
true
1508
}
1509
1510
fn sse2(&self) -> bool {
1511
// Currently, this x64 backend always assumes SSE2.
1512
true
1513
}
1514
1515
fn sse3(&self) -> bool {
1516
self.isa_flags.has_sse3()
1517
}
1518
1519
fn ssse3(&self) -> bool {
1520
self.isa_flags.has_ssse3()
1521
}
1522
1523
fn sse41(&self) -> bool {
1524
self.isa_flags.has_sse41()
1525
}
1526
1527
fn sse42(&self) -> bool {
1528
self.isa_flags.has_sse42()
1529
}
1530
1531
fn bmi1(&self) -> bool {
1532
self.isa_flags.has_bmi1()
1533
}
1534
1535
fn bmi2(&self) -> bool {
1536
self.isa_flags.has_bmi2()
1537
}
1538
1539
fn lzcnt(&self) -> bool {
1540
self.isa_flags.has_lzcnt()
1541
}
1542
1543
fn popcnt(&self) -> bool {
1544
self.isa_flags.has_popcnt()
1545
}
1546
1547
fn avx(&self) -> bool {
1548
self.isa_flags.has_avx()
1549
}
1550
1551
fn avx2(&self) -> bool {
1552
self.isa_flags.has_avx2()
1553
}
1554
1555
fn avx512f(&self) -> bool {
1556
self.isa_flags.has_avx512f()
1557
}
1558
1559
fn avx512vl(&self) -> bool {
1560
self.isa_flags.has_avx512vl()
1561
}
1562
1563
fn cmpxchg16b(&self) -> bool {
1564
self.isa_flags.has_cmpxchg16b()
1565
}
1566
1567
fn fma(&self) -> bool {
1568
self.isa_flags.has_fma()
1569
}
1570
1571
fn avx512dq(&self) -> bool {
1572
self.isa_flags.has_avx512dq()
1573
}
1574
1575
fn avx512bitalg(&self) -> bool {
1576
self.isa_flags.has_avx512bitalg()
1577
}
1578
1579
fn avx512vbmi(&self) -> bool {
1580
self.isa_flags.has_avx512vbmi()
1581
}
1582
}
1583
1584
impl MachInstEmit for Inst {
1585
type State = EmitState;
1586
type Info = EmitInfo;
1587
1588
fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
1589
emit::emit(self, sink, info, state);
1590
}
1591
1592
fn pretty_print_inst(&self, _: &mut Self::State) -> String {
1593
PrettyPrint::pretty_print(self, 0)
1594
}
1595
}
1596
1597
/// A label-use (internal relocation) in generated code.
1598
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1599
pub enum LabelUse {
1600
/// A 32-bit offset from location of relocation itself, added to the existing value at that
1601
/// location. Used for control flow instructions which consider an offset from the start of the
1602
/// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
1603
JmpRel32,
1604
1605
/// A 32-bit offset from location of relocation itself, added to the existing value at that
1606
/// location.
1607
PCRel32,
1608
}
1609
1610
impl MachInstLabelUse for LabelUse {
1611
const ALIGN: CodeOffset = 1;
1612
1613
fn max_pos_range(self) -> CodeOffset {
1614
match self {
1615
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
1616
}
1617
}
1618
1619
fn max_neg_range(self) -> CodeOffset {
1620
match self {
1621
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
1622
}
1623
}
1624
1625
fn patch_size(self) -> CodeOffset {
1626
match self {
1627
LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
1628
}
1629
}
1630
1631
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1632
let pc_rel = (label_offset as i64) - (use_offset as i64);
1633
debug_assert!(pc_rel <= self.max_pos_range() as i64);
1634
debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
1635
let pc_rel = pc_rel as u32;
1636
match self {
1637
LabelUse::JmpRel32 => {
1638
let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1639
let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
1640
buffer.copy_from_slice(&value.to_le_bytes()[..]);
1641
}
1642
LabelUse::PCRel32 => {
1643
let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1644
let value = pc_rel.wrapping_add(addend);
1645
buffer.copy_from_slice(&value.to_le_bytes()[..]);
1646
}
1647
}
1648
}
1649
1650
fn supports_veneer(self) -> bool {
1651
match self {
1652
LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
1653
}
1654
}
1655
1656
fn veneer_size(self) -> CodeOffset {
1657
match self {
1658
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
1659
}
1660
}
1661
1662
fn worst_case_veneer_size() -> CodeOffset {
1663
0
1664
}
1665
1666
fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
1667
match self {
1668
LabelUse::JmpRel32 | LabelUse::PCRel32 => {
1669
panic!("Veneer not supported for JumpRel32 label-use.");
1670
}
1671
}
1672
}
1673
1674
fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {
1675
match (reloc, addend) {
1676
(Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),
1677
_ => None,
1678
}
1679
}
1680
}
1681
1682