Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/x64/inst/mod.rs
1693 views
1
//! This module defines x86_64-specific machine instruction types.
2
3
pub use emit_state::EmitState;
4
5
use crate::binemit::{Addend, CodeOffset, Reloc};
6
use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
7
use crate::isa::x64::abi::X64ABIMachineSpec;
8
use crate::isa::x64::inst::regs::pretty_print_reg;
9
use crate::isa::x64::settings as x64_settings;
10
use crate::isa::{CallConv, FunctionAlignment};
11
use crate::{CodegenError, CodegenResult, settings};
12
use crate::{machinst::*, trace};
13
use alloc::boxed::Box;
14
use core::slice;
15
use cranelift_assembler_x64 as asm;
16
use smallvec::{SmallVec, smallvec};
17
use std::fmt::{self, Write};
18
use std::string::{String, ToString};
19
20
pub mod args;
21
mod emit;
22
mod emit_state;
23
#[cfg(test)]
24
mod emit_tests;
25
pub mod external;
26
pub mod regs;
27
mod stack_switch;
28
pub mod unwind;
29
30
use args::*;
31
32
//=============================================================================
33
// Instructions (top level): definition
34
35
// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.
36
pub use super::lower::isle::generated_code::AtomicRmwSeqOp;
37
pub use super::lower::isle::generated_code::MInst as Inst;
38
39
/// Out-of-line data for return-calls, to keep the size of `Inst` down.
40
#[derive(Clone, Debug)]
41
pub struct ReturnCallInfo<T> {
42
/// Where this call is going.
43
pub dest: T,
44
45
/// The size of the argument area for this return-call, potentially smaller than that of the
46
/// caller, but never larger.
47
pub new_stack_arg_size: u32,
48
49
/// The in-register arguments and their constraints.
50
pub uses: CallArgList,
51
52
/// A temporary for use when moving the return address.
53
pub tmp: WritableGpr,
54
}
55
56
#[test]
57
#[cfg(target_pointer_width = "64")]
58
fn inst_size_test() {
59
// This test will help with unintentionally growing the size
60
// of the Inst enum.
61
assert_eq!(48, std::mem::size_of::<Inst>());
62
}
63
64
impl Inst {
65
/// Check if the instruction (or pseudo-instruction) can be emitted given
66
/// the current target architecture given by `emit_info`. For non-assembler
67
/// instructions, this assumes a baseline feature set (i.e., 64-bit AND SSE2
68
/// and below).
69
fn is_available(&self, emit_info: &EmitInfo) -> bool {
70
use asm::AvailableFeatures;
71
72
match self {
73
// These instructions are part of SSE2, which is a basic requirement
74
// in Cranelift, and don't have to be checked.
75
Inst::AtomicRmwSeq { .. }
76
| Inst::CallKnown { .. }
77
| Inst::CallUnknown { .. }
78
| Inst::ReturnCallKnown { .. }
79
| Inst::ReturnCallUnknown { .. }
80
| Inst::CheckedSRemSeq { .. }
81
| Inst::CheckedSRemSeq8 { .. }
82
| Inst::CvtFloatToSintSeq { .. }
83
| Inst::CvtFloatToUintSeq { .. }
84
| Inst::CvtUint64ToFloatSeq { .. }
85
| Inst::JmpCond { .. }
86
| Inst::JmpCondOr { .. }
87
| Inst::WinchJmpIf { .. }
88
| Inst::JmpKnown { .. }
89
| Inst::JmpTableSeq { .. }
90
| Inst::LoadExtName { .. }
91
| Inst::MovFromPReg { .. }
92
| Inst::MovToPReg { .. }
93
| Inst::StackProbeLoop { .. }
94
| Inst::Args { .. }
95
| Inst::Rets { .. }
96
| Inst::StackSwitchBasic { .. }
97
| Inst::TrapIf { .. }
98
| Inst::TrapIfAnd { .. }
99
| Inst::TrapIfOr { .. }
100
| Inst::XmmCmove { .. }
101
| Inst::XmmMinMaxSeq { .. }
102
| Inst::XmmUninitializedValue { .. }
103
| Inst::GprUninitializedValue { .. }
104
| Inst::ElfTlsGetAddr { .. }
105
| Inst::MachOTlsGetAddr { .. }
106
| Inst::CoffTlsGetAddr { .. }
107
| Inst::Unwind { .. }
108
| Inst::DummyUse { .. }
109
| Inst::LabelAddress { .. } => true,
110
111
Inst::Atomic128RmwSeq { .. } | Inst::Atomic128XchgSeq { .. } => emit_info.cmpxchg16b(),
112
113
Inst::External { inst } => inst.is_available(&emit_info),
114
}
115
}
116
}
117
118
// Handy constructors for Insts.
119
120
impl Inst {
121
pub(crate) fn nop(len: u8) -> Self {
122
assert!(len > 0 && len <= 9);
123
let inst = match len {
124
1 => asm::inst::nop_1b::new().into(),
125
2 => asm::inst::nop_2b::new().into(),
126
3 => asm::inst::nop_3b::new().into(),
127
4 => asm::inst::nop_4b::new().into(),
128
5 => asm::inst::nop_5b::new().into(),
129
6 => asm::inst::nop_6b::new().into(),
130
7 => asm::inst::nop_7b::new().into(),
131
8 => asm::inst::nop_8b::new().into(),
132
9 => asm::inst::nop_9b::new().into(),
133
_ => unreachable!("nop length must be between 1 and 9"),
134
};
135
Self::External { inst }
136
}
137
138
pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
139
let inst = if let Ok(simm8) = i8::try_from(simm32) {
140
asm::inst::addq_mi_sxb::new(dst, simm8).into()
141
} else {
142
asm::inst::addq_mi_sxl::new(dst, simm32).into()
143
};
144
Inst::External { inst }
145
}
146
147
pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
148
let inst = if let Ok(simm8) = i8::try_from(simm32) {
149
asm::inst::subq_mi_sxb::new(dst, simm8).into()
150
} else {
151
asm::inst::subq_mi_sxl::new(dst, simm32).into()
152
};
153
Inst::External { inst }
154
}
155
156
/// Writes the `simm64` immedaite into `dst`.
157
///
158
/// Note that if `dst_size` is less than 64-bits then the upper bits of
159
/// `simm64` will be converted to zero.
160
pub fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
161
debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
162
debug_assert!(dst.to_reg().class() == RegClass::Int);
163
let dst = WritableGpr::from_writable_reg(dst).unwrap();
164
let inst = match dst_size {
165
OperandSize::Size64 => match u32::try_from(simm64) {
166
// If `simm64` is zero-extended use `movl` which zeros the
167
// upper bits.
168
Ok(imm32) => asm::inst::movl_oi::new(dst, imm32).into(),
169
_ => match i32::try_from(simm64.cast_signed()) {
170
// If `simm64` is sign-extended use `movq` which sign the
171
// upper bits.
172
Ok(simm32) => asm::inst::movq_mi_sxl::new(dst, simm32).into(),
173
// fall back to embedding the entire immediate.
174
_ => asm::inst::movabsq_oi::new(dst, simm64).into(),
175
},
176
},
177
// FIXME: the input to this function is a logical `simm64` stored
178
// as `u64`. That means that ideally what we would do here is cast
179
// the `simm64` to an `i64`, perform a `i32::try_from()`, then cast
180
// that back to `u32`. That would ensure that the immediate loses
181
// no meaning and has the same logical value. Currently though
182
// Cranelift relies on discarding the upper bits because literals
183
// like `0x8000_0000_u64` fail to convert to an `i32`. In theory
184
// the input to this function should change to `i64`. In the
185
// meantime this is documented as discarding the upper bits,
186
// although this is an old function so that's unlikely to help
187
// much.
188
_ => asm::inst::movl_oi::new(dst, simm64 as u32).into(),
189
};
190
Inst::External { inst }
191
}
192
193
pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
194
src.assert_regclass_is(RegClass::Int);
195
debug_assert!(dst.to_reg().class() == RegClass::Int);
196
let src = match src {
197
RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
198
RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
199
};
200
let inst = match ext_mode {
201
ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
202
ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
203
ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
204
ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
205
ExtMode::LQ => {
206
// This instruction selection may seem strange but is correct in
207
// 64-bit mode: section 3.4.1.1 of the Intel manual says that
208
// "32-bit operands generate a 32-bit result, zero-extended to a
209
// 64-bit result in the destination general-purpose register."
210
// This is applicable beyond `mov` but we use this fact to
211
// zero-extend `src` into `dst`.
212
asm::inst::movl_rm::new(dst, src).into()
213
}
214
};
215
Inst::External { inst }
216
}
217
218
pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
219
src.assert_regclass_is(RegClass::Int);
220
debug_assert!(dst.to_reg().class() == RegClass::Int);
221
let src = match src {
222
RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
223
RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
224
};
225
let inst = match ext_mode {
226
ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
227
ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
228
ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
229
ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
230
ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
231
};
232
Inst::External { inst }
233
}
234
235
/// Compares `src1` against `src2`
236
pub(crate) fn cmp_mi_sxb(size: OperandSize, src1: Gpr, src2: i8) -> Inst {
237
let inst = match size {
238
OperandSize::Size8 => asm::inst::cmpb_mi::new(src1, src2.cast_unsigned()).into(),
239
OperandSize::Size16 => asm::inst::cmpw_mi_sxb::new(src1, src2).into(),
240
OperandSize::Size32 => asm::inst::cmpl_mi_sxb::new(src1, src2).into(),
241
OperandSize::Size64 => asm::inst::cmpq_mi_sxb::new(src1, src2).into(),
242
};
243
Inst::External { inst }
244
}
245
246
pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
247
Inst::TrapIf { cc, trap_code }
248
}
249
250
pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {
251
Inst::CallKnown { info }
252
}
253
254
pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {
255
info.dest.assert_regclass_is(RegClass::Int);
256
Inst::CallUnknown { info }
257
}
258
259
pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
260
Inst::JmpKnown { dst }
261
}
262
263
/// Choose which instruction to use for loading a register value from memory. For loads smaller
264
/// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
265
/// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
266
pub(crate) fn load(
267
ty: Type,
268
from_addr: impl Into<SyntheticAmode>,
269
to_reg: Writable<Reg>,
270
ext_kind: ExtKind,
271
) -> Inst {
272
let rc = to_reg.to_reg().class();
273
match rc {
274
RegClass::Int => {
275
let ext_mode = match ty.bytes() {
276
1 => Some(ExtMode::BQ),
277
2 => Some(ExtMode::WQ),
278
4 => Some(ExtMode::LQ),
279
8 => None,
280
_ => unreachable!("the type should never use a scalar load: {}", ty),
281
};
282
if let Some(ext_mode) = ext_mode {
283
// Values smaller than 64 bits must be extended in some way.
284
match ext_kind {
285
ExtKind::SignExtend => {
286
Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
287
}
288
ExtKind::ZeroExtend => {
289
Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
290
}
291
ExtKind::None => {
292
panic!("expected an extension kind for extension mode: {ext_mode:?}")
293
}
294
}
295
} else {
296
// 64-bit values can be moved directly.
297
let from_addr = asm::GprMem::from(from_addr.into());
298
Inst::External {
299
inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),
300
}
301
}
302
}
303
RegClass::Float => {
304
let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());
305
let from_addr = from_addr.into();
306
let inst = match ty {
307
types::F16 | types::I8X2 => {
308
panic!("loading a f16 or i8x2 requires multiple instructions")
309
}
310
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
311
asm::inst::movss_a_m::new(to_reg, from_addr).into()
312
}
313
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
314
asm::inst::movsd_a_m::new(to_reg, from_addr).into()
315
}
316
types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),
317
types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),
318
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
319
asm::inst::movdqu_a::new(to_reg, from_addr).into()
320
}
321
_ => unimplemented!("unable to load type: {}", ty),
322
};
323
Inst::External { inst }
324
}
325
RegClass::Vector => unreachable!(),
326
}
327
}
328
329
/// Choose which instruction to use for storing a register value to memory.
330
pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
331
let rc = from_reg.class();
332
let to_addr = to_addr.into();
333
let inst = match rc {
334
RegClass::Int => {
335
let from_reg = Gpr::unwrap_new(from_reg);
336
match ty {
337
types::I8 => asm::inst::movb_mr::new(to_addr, from_reg).into(),
338
types::I16 => asm::inst::movw_mr::new(to_addr, from_reg).into(),
339
types::I32 => asm::inst::movl_mr::new(to_addr, from_reg).into(),
340
types::I64 => asm::inst::movq_mr::new(to_addr, from_reg).into(),
341
_ => unreachable!(),
342
}
343
}
344
RegClass::Float => {
345
let from_reg = Xmm::new(from_reg).unwrap();
346
match ty {
347
types::F16 | types::I8X2 => {
348
panic!("storing a f16 or i8x2 requires multiple instructions")
349
}
350
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
351
asm::inst::movss_c_m::new(to_addr, from_reg).into()
352
}
353
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
354
asm::inst::movsd_c_m::new(to_addr, from_reg).into()
355
}
356
types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),
357
types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),
358
_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
359
asm::inst::movdqu_b::new(to_addr, from_reg).into()
360
}
361
_ => unimplemented!("unable to store type: {}", ty),
362
}
363
}
364
RegClass::Vector => unreachable!(),
365
};
366
Inst::External { inst }
367
}
368
}
369
370
//=============================================================================
371
// Instructions: printing
372
373
impl PrettyPrint for Inst {
374
fn pretty_print(&self, _size: u8) -> String {
375
fn ljustify(s: String) -> String {
376
let w = 7;
377
if s.len() >= w {
378
s
379
} else {
380
let need = usize::min(w, w - s.len());
381
s + &format!("{nil: <width$}", nil = "", width = need)
382
}
383
}
384
385
fn ljustify2(s1: String, s2: String) -> String {
386
ljustify(s1 + &s2)
387
}
388
389
match self {
390
Inst::CheckedSRemSeq {
391
size,
392
divisor,
393
dividend_lo,
394
dividend_hi,
395
dst_quotient,
396
dst_remainder,
397
} => {
398
let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());
399
let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());
400
let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());
401
let dst_quotient =
402
pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());
403
let dst_remainder =
404
pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());
405
format!(
406
"checked_srem_seq {dividend_lo}, {dividend_hi}, \
407
{divisor}, {dst_quotient}, {dst_remainder}",
408
)
409
}
410
411
Inst::CheckedSRemSeq8 {
412
divisor,
413
dividend,
414
dst,
415
} => {
416
let divisor = pretty_print_reg(divisor.to_reg(), 1);
417
let dividend = pretty_print_reg(dividend.to_reg(), 1);
418
let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
419
format!("checked_srem_seq {dividend}, {divisor}, {dst}")
420
}
421
422
Inst::XmmMinMaxSeq {
423
lhs,
424
rhs,
425
dst,
426
is_min,
427
size,
428
} => {
429
let rhs = pretty_print_reg(rhs.to_reg(), 8);
430
let lhs = pretty_print_reg(lhs.to_reg(), 8);
431
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
432
let op = ljustify2(
433
if *is_min {
434
"xmm min seq ".to_string()
435
} else {
436
"xmm max seq ".to_string()
437
},
438
format!("f{}", size.to_bits()),
439
);
440
format!("{op} {lhs}, {rhs}, {dst}")
441
}
442
443
Inst::XmmUninitializedValue { dst } => {
444
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
445
let op = ljustify("uninit".into());
446
format!("{op} {dst}")
447
}
448
449
Inst::GprUninitializedValue { dst } => {
450
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
451
let op = ljustify("uninit".into());
452
format!("{op} {dst}")
453
}
454
455
Inst::CvtUint64ToFloatSeq {
456
src,
457
dst,
458
dst_size,
459
tmp_gpr1,
460
tmp_gpr2,
461
..
462
} => {
463
let src = pretty_print_reg(src.to_reg(), 8);
464
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
465
let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);
466
let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);
467
let op = ljustify(format!(
468
"u64_to_{}_seq",
469
if *dst_size == OperandSize::Size64 {
470
"f64"
471
} else {
472
"f32"
473
}
474
));
475
format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")
476
}
477
478
Inst::CvtFloatToSintSeq {
479
src,
480
dst,
481
src_size,
482
dst_size,
483
tmp_xmm,
484
tmp_gpr,
485
is_saturating,
486
} => {
487
let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
488
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
489
let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
490
let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
491
let op = ljustify(format!(
492
"cvt_float{}_to_sint{}{}_seq",
493
src_size.to_bits(),
494
dst_size.to_bits(),
495
if *is_saturating { "_sat" } else { "" },
496
));
497
format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")
498
}
499
500
Inst::CvtFloatToUintSeq {
501
src,
502
dst,
503
src_size,
504
dst_size,
505
tmp_gpr,
506
tmp_xmm,
507
tmp_xmm2,
508
is_saturating,
509
} => {
510
let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
511
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
512
let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
513
let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
514
let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);
515
let op = ljustify(format!(
516
"cvt_float{}_to_uint{}{}_seq",
517
src_size.to_bits(),
518
dst_size.to_bits(),
519
if *is_saturating { "_sat" } else { "" },
520
));
521
format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")
522
}
523
524
Inst::MovFromPReg { src, dst } => {
525
let src: Reg = (*src).into();
526
let src = pretty_print_reg(src, 8);
527
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
528
let op = ljustify("movq".to_string());
529
format!("{op} {src}, {dst}")
530
}
531
532
Inst::MovToPReg { src, dst } => {
533
let src = pretty_print_reg(src.to_reg(), 8);
534
let dst: Reg = (*dst).into();
535
let dst = pretty_print_reg(dst, 8);
536
let op = ljustify("movq".to_string());
537
format!("{op} {src}, {dst}")
538
}
539
540
Inst::XmmCmove {
541
ty,
542
cc,
543
consequent,
544
alternative,
545
dst,
546
..
547
} => {
548
let size = u8::try_from(ty.bytes()).unwrap();
549
let alternative = pretty_print_reg(alternative.to_reg(), size);
550
let dst = pretty_print_reg(dst.to_reg().to_reg(), size);
551
let consequent = pretty_print_reg(consequent.to_reg(), size);
552
let suffix = match *ty {
553
types::F64 => "sd",
554
types::F32 => "ss",
555
types::F16 => "ss",
556
types::F32X4 => "aps",
557
types::F64X2 => "apd",
558
_ => "dqa",
559
};
560
let cc = cc.invert();
561
format!(
562
"mov{suffix} {alternative}, {dst}; \
563
j{cc} $next; \
564
mov{suffix} {consequent}, {dst}; \
565
$next:"
566
)
567
}
568
569
Inst::StackProbeLoop {
570
tmp,
571
frame_size,
572
guard_size,
573
} => {
574
let tmp = pretty_print_reg(tmp.to_reg(), 8);
575
let op = ljustify("stack_probe_loop".to_string());
576
format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")
577
}
578
579
Inst::CallKnown { info } => {
580
let op = ljustify("call".to_string());
581
let try_call = info
582
.try_call_info
583
.as_ref()
584
.map(|tci| pretty_print_try_call(tci))
585
.unwrap_or_default();
586
format!("{op} {:?}{try_call}", info.dest)
587
}
588
589
Inst::CallUnknown { info } => {
590
let dest = info.dest.pretty_print(8);
591
let op = ljustify("call".to_string());
592
let try_call = info
593
.try_call_info
594
.as_ref()
595
.map(|tci| pretty_print_try_call(tci))
596
.unwrap_or_default();
597
format!("{op} *{dest}{try_call}")
598
}
599
600
Inst::ReturnCallKnown { info } => {
601
let ReturnCallInfo {
602
uses,
603
new_stack_arg_size,
604
tmp,
605
dest,
606
} = &**info;
607
let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
608
let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");
609
for ret in uses {
610
let preg = pretty_print_reg(ret.preg, 8);
611
let vreg = pretty_print_reg(ret.vreg, 8);
612
write!(&mut s, " {vreg}={preg}").unwrap();
613
}
614
s
615
}
616
617
Inst::ReturnCallUnknown { info } => {
618
let ReturnCallInfo {
619
uses,
620
new_stack_arg_size,
621
tmp,
622
dest,
623
} = &**info;
624
let callee = pretty_print_reg(*dest, 8);
625
let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
626
let mut s =
627
format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");
628
for ret in uses {
629
let preg = pretty_print_reg(ret.preg, 8);
630
let vreg = pretty_print_reg(ret.vreg, 8);
631
write!(&mut s, " {vreg}={preg}").unwrap();
632
}
633
s
634
}
635
636
Inst::Args { args } => {
637
let mut s = "args".to_string();
638
for arg in args {
639
let preg = pretty_print_reg(arg.preg, 8);
640
let def = pretty_print_reg(arg.vreg.to_reg(), 8);
641
write!(&mut s, " {def}={preg}").unwrap();
642
}
643
s
644
}
645
646
Inst::Rets { rets } => {
647
let mut s = "rets".to_string();
648
for ret in rets {
649
let preg = pretty_print_reg(ret.preg, 8);
650
let vreg = pretty_print_reg(ret.vreg, 8);
651
write!(&mut s, " {vreg}={preg}").unwrap();
652
}
653
s
654
}
655
656
Inst::StackSwitchBasic {
657
store_context_ptr,
658
load_context_ptr,
659
in_payload0,
660
out_payload0,
661
} => {
662
let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);
663
let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);
664
let in_payload0 = pretty_print_reg(**in_payload0, 8);
665
let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);
666
format!(
667
"{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"
668
)
669
}
670
671
Inst::JmpKnown { dst } => {
672
let op = ljustify("jmp".to_string());
673
let dst = dst.to_string();
674
format!("{op} {dst}")
675
}
676
677
Inst::WinchJmpIf { cc, taken } => {
678
let taken = taken.to_string();
679
let op = ljustify2("j".to_string(), cc.to_string());
680
format!("{op} {taken}")
681
}
682
683
Inst::JmpCondOr {
684
cc1,
685
cc2,
686
taken,
687
not_taken,
688
} => {
689
let taken = taken.to_string();
690
let not_taken = not_taken.to_string();
691
let op = ljustify(format!("j{cc1},{cc2}"));
692
format!("{op} {taken}; j {not_taken}")
693
}
694
695
Inst::JmpCond {
696
cc,
697
taken,
698
not_taken,
699
} => {
700
let taken = taken.to_string();
701
let not_taken = not_taken.to_string();
702
let op = ljustify2("j".to_string(), cc.to_string());
703
format!("{op} {taken}; j {not_taken}")
704
}
705
706
Inst::JmpTableSeq {
707
idx, tmp1, tmp2, ..
708
} => {
709
let idx = pretty_print_reg(*idx, 8);
710
let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);
711
let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);
712
let op = ljustify("br_table".into());
713
format!("{op} {idx}, {tmp1}, {tmp2}")
714
}
715
716
Inst::TrapIf { cc, trap_code, .. } => {
717
format!("j{cc} #trap={trap_code}")
718
}
719
720
Inst::TrapIfAnd {
721
cc1,
722
cc2,
723
trap_code,
724
..
725
} => {
726
let cc1 = cc1.invert();
727
let cc2 = cc2.invert();
728
format!("trap_if_and {cc1}, {cc2}, {trap_code}")
729
}
730
731
Inst::TrapIfOr {
732
cc1,
733
cc2,
734
trap_code,
735
..
736
} => {
737
let cc2 = cc2.invert();
738
format!("trap_if_or {cc1}, {cc2}, {trap_code}")
739
}
740
741
Inst::LoadExtName {
742
dst, name, offset, ..
743
} => {
744
let dst = pretty_print_reg(*dst.to_reg(), 8);
745
let name = name.display(None);
746
let op = ljustify("load_ext_name".into());
747
format!("{op} {name}+{offset}, {dst}")
748
}
749
750
Inst::AtomicRmwSeq { ty, op, .. } => {
751
let ty = ty.bits();
752
format!(
753
"atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"
754
)
755
}
756
757
Inst::Atomic128RmwSeq {
758
op,
759
mem,
760
operand_low,
761
operand_high,
762
temp_low,
763
temp_high,
764
dst_old_low,
765
dst_old_high,
766
} => {
767
let operand_low = pretty_print_reg(**operand_low, 8);
768
let operand_high = pretty_print_reg(**operand_high, 8);
769
let temp_low = pretty_print_reg(*temp_low.to_reg(), 8);
770
let temp_high = pretty_print_reg(*temp_high.to_reg(), 8);
771
let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
772
let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
773
let mem = mem.pretty_print(16);
774
format!(
775
"atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"
776
)
777
}
778
779
Inst::Atomic128XchgSeq {
780
mem,
781
operand_low,
782
operand_high,
783
dst_old_low,
784
dst_old_high,
785
} => {
786
let operand_low = pretty_print_reg(**operand_low, 8);
787
let operand_high = pretty_print_reg(**operand_high, 8);
788
let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
789
let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
790
let mem = mem.pretty_print(16);
791
format!(
792
"atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"
793
)
794
}
795
796
Inst::ElfTlsGetAddr { symbol, dst } => {
797
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
798
format!("{dst} = elf_tls_get_addr {symbol:?}")
799
}
800
801
Inst::MachOTlsGetAddr { symbol, dst } => {
802
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
803
format!("{dst} = macho_tls_get_addr {symbol:?}")
804
}
805
806
Inst::CoffTlsGetAddr { symbol, dst, tmp } => {
807
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
808
let tmp = tmp.to_reg().to_reg();
809
810
let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");
811
if tmp.is_virtual() {
812
let tmp = pretty_print_reg(tmp, 8);
813
write!(&mut s, ", {tmp}").unwrap();
814
};
815
816
s
817
}
818
819
Inst::Unwind { inst } => format!("unwind {inst:?}"),
820
821
Inst::DummyUse { reg } => {
822
let reg = pretty_print_reg(*reg, 8);
823
format!("dummy_use {reg}")
824
}
825
826
Inst::LabelAddress { dst, label } => {
827
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
828
format!("label_address {dst}, {label:?}")
829
}
830
831
Inst::External { inst } => {
832
format!("{inst}")
833
}
834
}
835
}
836
}
837
838
fn pretty_print_try_call(info: &TryCallInfo) -> String {
839
format!(
840
"; jmp {:?}; catch [{}]",
841
info.continuation,
842
info.pretty_print_dests()
843
)
844
}
845
846
impl fmt::Debug for Inst {
847
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
848
write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))
849
}
850
}
851
852
fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
853
// Note: because we need to statically know the indices of each
854
// reg in the operands list in order to fetch its allocation
855
// later, we put the variable-operand-count bits (the RegMem,
856
// RegMemImm, etc args) last. regalloc2 doesn't care what order
857
// the operands come in; they can be freely reordered.
858
859
// N.B.: we MUST keep the below in careful sync with (i) emission,
860
// in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
861
// method above.
862
match inst {
863
Inst::CheckedSRemSeq {
864
divisor,
865
dividend_lo,
866
dividend_hi,
867
dst_quotient,
868
dst_remainder,
869
..
870
} => {
871
collector.reg_use(divisor);
872
collector.reg_fixed_use(dividend_lo, regs::rax());
873
collector.reg_fixed_use(dividend_hi, regs::rdx());
874
collector.reg_fixed_def(dst_quotient, regs::rax());
875
collector.reg_fixed_def(dst_remainder, regs::rdx());
876
}
877
Inst::CheckedSRemSeq8 {
878
divisor,
879
dividend,
880
dst,
881
..
882
} => {
883
collector.reg_use(divisor);
884
collector.reg_fixed_use(dividend, regs::rax());
885
collector.reg_fixed_def(dst, regs::rax());
886
}
887
Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),
888
Inst::GprUninitializedValue { dst } => collector.reg_def(dst),
889
Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
890
collector.reg_use(rhs);
891
collector.reg_use(lhs);
892
collector.reg_reuse_def(dst, 0); // Reuse RHS.
893
}
894
Inst::MovFromPReg { dst, src } => {
895
debug_assert!(dst.to_reg().to_reg().is_virtual());
896
collector.reg_fixed_nonallocatable(*src);
897
collector.reg_def(dst);
898
}
899
Inst::MovToPReg { dst, src } => {
900
debug_assert!(src.to_reg().is_virtual());
901
collector.reg_use(src);
902
collector.reg_fixed_nonallocatable(*dst);
903
}
904
Inst::CvtUint64ToFloatSeq {
905
src,
906
dst,
907
tmp_gpr1,
908
tmp_gpr2,
909
..
910
} => {
911
collector.reg_use(src);
912
collector.reg_early_def(dst);
913
collector.reg_early_def(tmp_gpr1);
914
collector.reg_early_def(tmp_gpr2);
915
}
916
Inst::CvtFloatToSintSeq {
917
src,
918
dst,
919
tmp_xmm,
920
tmp_gpr,
921
..
922
} => {
923
collector.reg_use(src);
924
collector.reg_early_def(dst);
925
collector.reg_early_def(tmp_gpr);
926
collector.reg_early_def(tmp_xmm);
927
}
928
Inst::CvtFloatToUintSeq {
929
src,
930
dst,
931
tmp_gpr,
932
tmp_xmm,
933
tmp_xmm2,
934
..
935
} => {
936
collector.reg_use(src);
937
collector.reg_early_def(dst);
938
collector.reg_early_def(tmp_gpr);
939
collector.reg_early_def(tmp_xmm);
940
collector.reg_early_def(tmp_xmm2);
941
}
942
943
Inst::XmmCmove {
944
consequent,
945
alternative,
946
dst,
947
..
948
} => {
949
collector.reg_use(alternative);
950
collector.reg_reuse_def(dst, 0);
951
collector.reg_use(consequent);
952
}
953
Inst::StackProbeLoop { tmp, .. } => {
954
collector.reg_early_def(tmp);
955
}
956
957
Inst::CallKnown { info } => {
958
// Probestack is special and is only inserted after
959
// regalloc, so we do not need to represent its ABI to the
960
// register allocator. Assert that we don't alter that
961
// arrangement.
962
let CallInfo {
963
uses,
964
defs,
965
clobbers,
966
dest,
967
try_call_info,
968
..
969
} = &mut **info;
970
debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
971
for CallArgPair { vreg, preg } in uses {
972
collector.reg_fixed_use(vreg, *preg);
973
}
974
for CallRetPair { vreg, location } in defs {
975
match location {
976
RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
977
RetLocation::Stack(..) => collector.any_def(vreg),
978
}
979
}
980
collector.reg_clobbers(*clobbers);
981
if let Some(try_call_info) = try_call_info {
982
try_call_info.collect_operands(collector);
983
}
984
}
985
986
Inst::CallUnknown { info } => {
987
let CallInfo {
988
uses,
989
defs,
990
clobbers,
991
callee_conv,
992
dest,
993
try_call_info,
994
..
995
} = &mut **info;
996
match dest {
997
RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {
998
// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
999
// This shouldn't be a fixed register constraint. r10 is caller-saved, so this
1000
// should be safe to use.
1001
collector.reg_fixed_use(reg, regs::r10());
1002
}
1003
_ => dest.get_operands(collector),
1004
}
1005
for CallArgPair { vreg, preg } in uses {
1006
collector.reg_fixed_use(vreg, *preg);
1007
}
1008
for CallRetPair { vreg, location } in defs {
1009
match location {
1010
RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1011
RetLocation::Stack(..) => collector.any_def(vreg),
1012
}
1013
}
1014
collector.reg_clobbers(*clobbers);
1015
if let Some(try_call_info) = try_call_info {
1016
try_call_info.collect_operands(collector);
1017
}
1018
}
1019
Inst::StackSwitchBasic {
1020
store_context_ptr,
1021
load_context_ptr,
1022
in_payload0,
1023
out_payload0,
1024
} => {
1025
collector.reg_use(load_context_ptr);
1026
collector.reg_use(store_context_ptr);
1027
collector.reg_fixed_use(in_payload0, stack_switch::payload_register());
1028
collector.reg_fixed_def(out_payload0, stack_switch::payload_register());
1029
1030
let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;
1031
// The return/payload reg must not be included in the clobber set
1032
clobbers.remove(
1033
stack_switch::payload_register()
1034
.to_real_reg()
1035
.unwrap()
1036
.into(),
1037
);
1038
collector.reg_clobbers(clobbers);
1039
}
1040
1041
Inst::ReturnCallKnown { info } => {
1042
let ReturnCallInfo {
1043
dest, uses, tmp, ..
1044
} = &mut **info;
1045
collector.reg_fixed_def(tmp, regs::r11());
1046
// Same as in the `Inst::CallKnown` branch.
1047
debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1048
for CallArgPair { vreg, preg } in uses {
1049
collector.reg_fixed_use(vreg, *preg);
1050
}
1051
}
1052
1053
Inst::ReturnCallUnknown { info } => {
1054
let ReturnCallInfo {
1055
dest, uses, tmp, ..
1056
} = &mut **info;
1057
1058
// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1059
// This shouldn't be a fixed register constraint, but it's not clear how to
1060
// pick a register that won't be clobbered by the callee-save restore code
1061
// emitted with a return_call_indirect. r10 is caller-saved, so this should be
1062
// safe to use.
1063
collector.reg_fixed_use(dest, regs::r10());
1064
1065
collector.reg_fixed_def(tmp, regs::r11());
1066
for CallArgPair { vreg, preg } in uses {
1067
collector.reg_fixed_use(vreg, *preg);
1068
}
1069
}
1070
1071
Inst::JmpTableSeq {
1072
idx, tmp1, tmp2, ..
1073
} => {
1074
collector.reg_use(idx);
1075
collector.reg_early_def(tmp1);
1076
// In the sequence emitted for this pseudoinstruction in emit.rs,
1077
// tmp2 is only written after idx is read, so it doesn't need to be
1078
// an early def.
1079
collector.reg_def(tmp2);
1080
}
1081
1082
Inst::LoadExtName { dst, .. } => {
1083
collector.reg_def(dst);
1084
}
1085
1086
Inst::AtomicRmwSeq {
1087
operand,
1088
temp,
1089
dst_old,
1090
mem,
1091
..
1092
} => {
1093
collector.reg_late_use(operand);
1094
collector.reg_early_def(temp);
1095
// This `fixed_def` is needed because `CMPXCHG` always uses this
1096
// register implicitly.
1097
collector.reg_fixed_def(dst_old, regs::rax());
1098
mem.get_operands_late(collector)
1099
}
1100
1101
Inst::Atomic128RmwSeq {
1102
operand_low,
1103
operand_high,
1104
temp_low,
1105
temp_high,
1106
dst_old_low,
1107
dst_old_high,
1108
mem,
1109
..
1110
} => {
1111
// All registers are collected in the `Late` position so that they don't overlap.
1112
collector.reg_late_use(operand_low);
1113
collector.reg_late_use(operand_high);
1114
collector.reg_fixed_def(temp_low, regs::rbx());
1115
collector.reg_fixed_def(temp_high, regs::rcx());
1116
collector.reg_fixed_def(dst_old_low, regs::rax());
1117
collector.reg_fixed_def(dst_old_high, regs::rdx());
1118
mem.get_operands_late(collector)
1119
}
1120
1121
Inst::Atomic128XchgSeq {
1122
operand_low,
1123
operand_high,
1124
dst_old_low,
1125
dst_old_high,
1126
mem,
1127
..
1128
} => {
1129
// All registers are collected in the `Late` position so that they don't overlap.
1130
collector.reg_fixed_late_use(operand_low, regs::rbx());
1131
collector.reg_fixed_late_use(operand_high, regs::rcx());
1132
collector.reg_fixed_def(dst_old_low, regs::rax());
1133
collector.reg_fixed_def(dst_old_high, regs::rdx());
1134
mem.get_operands_late(collector)
1135
}
1136
1137
Inst::Args { args } => {
1138
for ArgPair { vreg, preg } in args {
1139
collector.reg_fixed_def(vreg, *preg);
1140
}
1141
}
1142
1143
Inst::Rets { rets } => {
1144
// The return value(s) are live-out; we represent this
1145
// with register uses on the return instruction.
1146
for RetPair { vreg, preg } in rets {
1147
collector.reg_fixed_use(vreg, *preg);
1148
}
1149
}
1150
1151
Inst::JmpKnown { .. }
1152
| Inst::WinchJmpIf { .. }
1153
| Inst::JmpCond { .. }
1154
| Inst::JmpCondOr { .. }
1155
| Inst::TrapIf { .. }
1156
| Inst::TrapIfAnd { .. }
1157
| Inst::TrapIfOr { .. } => {
1158
// No registers are used.
1159
}
1160
1161
Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
1162
collector.reg_fixed_def(dst, regs::rax());
1163
// All caller-saves are clobbered.
1164
//
1165
// We use the SysV calling convention here because the
1166
// pseudoinstruction (and relocation that it emits) is specific to
1167
// ELF systems; other x86-64 targets with other conventions (i.e.,
1168
// Windows) use different TLS strategies.
1169
let mut clobbers =
1170
X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);
1171
clobbers.remove(regs::gpr_preg(asm::gpr::enc::RAX));
1172
collector.reg_clobbers(clobbers);
1173
}
1174
1175
Inst::CoffTlsGetAddr { dst, tmp, .. } => {
1176
// We also use the gs register. But that register is not allocatable by the
1177
// register allocator, so we don't need to mark it as used here.
1178
1179
// We use %rax to set the address
1180
collector.reg_fixed_def(dst, regs::rax());
1181
1182
// We use %rcx as a temporary variable to load the _tls_index
1183
collector.reg_fixed_def(tmp, regs::rcx());
1184
}
1185
1186
Inst::Unwind { .. } => {}
1187
1188
Inst::DummyUse { reg } => {
1189
collector.reg_use(reg);
1190
}
1191
1192
Inst::LabelAddress { dst, .. } => {
1193
collector.reg_def(dst);
1194
}
1195
1196
Inst::External { inst } => {
1197
inst.visit(&mut external::RegallocVisitor { collector });
1198
}
1199
}
1200
}
1201
1202
//=============================================================================
1203
// Instructions: misc functions and external interface
1204
1205
impl MachInst for Inst {
1206
type ABIMachineSpec = X64ABIMachineSpec;
1207
1208
fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
1209
x64_get_operands(self, collector)
1210
}
1211
1212
fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
1213
use asm::inst::Inst as I;
1214
match self {
1215
// Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
1216
// out the upper 32 bits of the destination. For example, we could
1217
// conceivably use `movl %reg, %reg` to zero out the top 32 bits of
1218
// %reg.
1219
Self::External {
1220
inst: I::movq_mr(asm::inst::movq_mr { rm64, r64 }),
1221
} => match rm64 {
1222
asm::GprMem::Gpr(reg) => Some((reg.map(|r| r.to_reg()), r64.as_ref().to_reg())),
1223
asm::GprMem::Mem(_) => None,
1224
},
1225
Self::External {
1226
inst: I::movq_rm(asm::inst::movq_rm { r64, rm64 }),
1227
} => match rm64 {
1228
asm::GprMem::Gpr(reg) => Some((r64.as_ref().map(|r| r.to_reg()), reg.to_reg())),
1229
asm::GprMem::Mem(_) => None,
1230
},
1231
1232
// Note that `movss_a_r` and `movsd_a_r` are specifically omitted
1233
// here because they only overwrite the low bits in the destination
1234
// register, otherwise preserving the upper bits. That can be used
1235
// for lane-insertion instructions, for example, meaning it's not
1236
// classified as a register move.
1237
//
1238
// Otherwise though all register-to-register movement instructions
1239
// which move 128-bits are registered as moves.
1240
Self::External {
1241
inst:
1242
I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })
1243
| I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })
1244
| I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })
1245
| I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })
1246
| I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })
1247
| I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),
1248
} => match xmm_m128 {
1249
asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),
1250
asm::XmmMem::Mem(_) => None,
1251
},
1252
// In addition to the "A" format of instructions above also
1253
// recognize the "B" format which while it can be used for stores it
1254
// can also be used for register moves.
1255
Self::External {
1256
inst:
1257
I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })
1258
| I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })
1259
| I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })
1260
| I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })
1261
| I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })
1262
| I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),
1263
} => match xmm_m128 {
1264
asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),
1265
asm::XmmMem::Mem(_) => None,
1266
},
1267
_ => None,
1268
}
1269
}
1270
1271
fn is_included_in_clobbers(&self) -> bool {
1272
match self {
1273
&Inst::Args { .. } => false,
1274
_ => true,
1275
}
1276
}
1277
1278
fn is_trap(&self) -> bool {
1279
match self {
1280
Self::External {
1281
inst: asm::inst::Inst::ud2_zo(..),
1282
} => true,
1283
_ => false,
1284
}
1285
}
1286
1287
fn is_args(&self) -> bool {
1288
match self {
1289
Self::Args { .. } => true,
1290
_ => false,
1291
}
1292
}
1293
1294
fn call_type(&self) -> CallType {
1295
match self {
1296
Inst::CallKnown { .. }
1297
| Inst::CallUnknown { .. }
1298
| Inst::ElfTlsGetAddr { .. }
1299
| Inst::MachOTlsGetAddr { .. } => CallType::Regular,
1300
1301
Inst::ReturnCallKnown { .. } | Inst::ReturnCallUnknown { .. } => CallType::TailCall,
1302
1303
_ => CallType::None,
1304
}
1305
}
1306
1307
fn is_term(&self) -> MachTerminator {
1308
match self {
1309
// Interesting cases.
1310
&Self::Rets { .. } => MachTerminator::Ret,
1311
&Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {
1312
MachTerminator::RetCall
1313
}
1314
&Self::JmpKnown { .. } => MachTerminator::Branch,
1315
&Self::JmpCond { .. } => MachTerminator::Branch,
1316
&Self::JmpCondOr { .. } => MachTerminator::Branch,
1317
&Self::JmpTableSeq { .. } => MachTerminator::Branch,
1318
&Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1319
&Self::CallUnknown { ref info } if info.try_call_info.is_some() => {
1320
MachTerminator::Branch
1321
}
1322
// All other cases are boring.
1323
_ => MachTerminator::None,
1324
}
1325
}
1326
1327
fn is_low_level_branch(&self) -> bool {
1328
match self {
1329
&Self::WinchJmpIf { .. } => true,
1330
_ => false,
1331
}
1332
}
1333
1334
fn is_mem_access(&self) -> bool {
1335
panic!("TODO FILL ME OUT")
1336
}
1337
1338
fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
1339
trace!(
1340
"Inst::gen_move {:?} -> {:?} (type: {:?})",
1341
src_reg,
1342
dst_reg.to_reg(),
1343
ty
1344
);
1345
let rc_dst = dst_reg.to_reg().class();
1346
let rc_src = src_reg.class();
1347
// If this isn't true, we have gone way off the rails.
1348
debug_assert!(rc_dst == rc_src);
1349
let inst = match rc_dst {
1350
RegClass::Int => {
1351
asm::inst::movq_mr::new(dst_reg.map(Gpr::unwrap_new), Gpr::unwrap_new(src_reg))
1352
.into()
1353
}
1354
RegClass::Float => {
1355
// The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
1356
// doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
1357
// those, which may write more lanes that we need, but are specified to have
1358
// zero-latency.
1359
let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());
1360
let src_reg = Xmm::new(src_reg).unwrap();
1361
match ty {
1362
types::F16 | types::F32 | types::F64 | types::F32X4 => {
1363
asm::inst::movaps_a::new(dst_reg, src_reg).into()
1364
}
1365
types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),
1366
_ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {
1367
asm::inst::movdqa_a::new(dst_reg, src_reg).into()
1368
}
1369
_ => unimplemented!("unable to move type: {}", ty),
1370
}
1371
}
1372
RegClass::Vector => unreachable!(),
1373
};
1374
Inst::External { inst }
1375
}
1376
1377
fn gen_nop(preferred_size: usize) -> Inst {
1378
Inst::nop(std::cmp::min(preferred_size, 9) as u8)
1379
}
1380
1381
fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1382
match ty {
1383
types::I8 => Ok((&[RegClass::Int], &[types::I8])),
1384
types::I16 => Ok((&[RegClass::Int], &[types::I16])),
1385
types::I32 => Ok((&[RegClass::Int], &[types::I32])),
1386
types::I64 => Ok((&[RegClass::Int], &[types::I64])),
1387
types::F16 => Ok((&[RegClass::Float], &[types::F16])),
1388
types::F32 => Ok((&[RegClass::Float], &[types::F32])),
1389
types::F64 => Ok((&[RegClass::Float], &[types::F64])),
1390
types::F128 => Ok((&[RegClass::Float], &[types::F128])),
1391
types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
1392
_ if ty.is_vector() && ty.bits() <= 128 => {
1393
let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1394
Ok((
1395
&[RegClass::Float],
1396
slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1397
))
1398
}
1399
_ => Err(CodegenError::Unsupported(format!(
1400
"Unexpected SSA-value type: {ty}"
1401
))),
1402
}
1403
}
1404
1405
fn canonical_type_for_rc(rc: RegClass) -> Type {
1406
match rc {
1407
RegClass::Float => types::I8X16,
1408
RegClass::Int => types::I64,
1409
RegClass::Vector => unreachable!(),
1410
}
1411
}
1412
1413
fn gen_jump(label: MachLabel) -> Inst {
1414
Inst::jmp_known(label)
1415
}
1416
1417
fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {
1418
Some(Inst::imm(OperandSize::Size64, value, dst))
1419
}
1420
1421
fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
1422
let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);
1423
let gpr_to_xmm = Inst::External {
1424
inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),
1425
};
1426
smallvec![imm_to_gpr, gpr_to_xmm]
1427
}
1428
1429
fn gen_dummy_use(reg: Reg) -> Self {
1430
Inst::DummyUse { reg }
1431
}
1432
1433
fn worst_case_size() -> CodeOffset {
1434
15
1435
}
1436
1437
fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1438
RegClass::Int
1439
}
1440
1441
fn is_safepoint(&self) -> bool {
1442
match self {
1443
Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,
1444
_ => false,
1445
}
1446
}
1447
1448
fn function_alignment() -> FunctionAlignment {
1449
FunctionAlignment {
1450
minimum: 1,
1451
// Change the alignment from 16-bytes to 32-bytes for better performance.
1452
// fix-8573: https://github.com/bytecodealliance/wasmtime/issues/8573
1453
preferred: 32,
1454
}
1455
}
1456
1457
type LabelUse = LabelUse;
1458
1459
const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];
1460
}
1461
1462
/// Constant state used during emissions of a sequence of instructions.
1463
pub struct EmitInfo {
1464
pub(super) flags: settings::Flags,
1465
isa_flags: x64_settings::Flags,
1466
}
1467
1468
impl EmitInfo {
1469
/// Create a constant state for emission of instructions.
1470
pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
1471
Self { flags, isa_flags }
1472
}
1473
}
1474
1475
impl asm::AvailableFeatures for &EmitInfo {
1476
fn _64b(&self) -> bool {
1477
// Currently, this x64 backend always assumes 64-bit mode.
1478
true
1479
}
1480
1481
fn compat(&self) -> bool {
1482
// For 32-bit compatibility mode, see
1483
// https://github.com/bytecodealliance/wasmtime/issues/1980 (TODO).
1484
false
1485
}
1486
1487
fn sse(&self) -> bool {
1488
// Currently, this x64 backend always assumes SSE.
1489
true
1490
}
1491
1492
fn sse2(&self) -> bool {
1493
// Currently, this x64 backend always assumes SSE2.
1494
true
1495
}
1496
1497
fn sse3(&self) -> bool {
1498
self.isa_flags.has_sse3()
1499
}
1500
1501
fn ssse3(&self) -> bool {
1502
self.isa_flags.has_ssse3()
1503
}
1504
1505
fn sse41(&self) -> bool {
1506
self.isa_flags.has_sse41()
1507
}
1508
1509
fn sse42(&self) -> bool {
1510
self.isa_flags.has_sse42()
1511
}
1512
1513
fn bmi1(&self) -> bool {
1514
self.isa_flags.has_bmi1()
1515
}
1516
1517
fn bmi2(&self) -> bool {
1518
self.isa_flags.has_bmi2()
1519
}
1520
1521
fn lzcnt(&self) -> bool {
1522
self.isa_flags.has_lzcnt()
1523
}
1524
1525
fn popcnt(&self) -> bool {
1526
self.isa_flags.has_popcnt()
1527
}
1528
1529
fn avx(&self) -> bool {
1530
self.isa_flags.has_avx()
1531
}
1532
1533
fn avx2(&self) -> bool {
1534
self.isa_flags.has_avx2()
1535
}
1536
1537
fn avx512f(&self) -> bool {
1538
self.isa_flags.has_avx512f()
1539
}
1540
1541
fn avx512vl(&self) -> bool {
1542
self.isa_flags.has_avx512vl()
1543
}
1544
1545
fn cmpxchg16b(&self) -> bool {
1546
self.isa_flags.has_cmpxchg16b()
1547
}
1548
1549
fn fma(&self) -> bool {
1550
self.isa_flags.has_fma()
1551
}
1552
1553
fn avx512dq(&self) -> bool {
1554
self.isa_flags.has_avx512dq()
1555
}
1556
1557
fn avx512bitalg(&self) -> bool {
1558
self.isa_flags.has_avx512bitalg()
1559
}
1560
1561
fn avx512vbmi(&self) -> bool {
1562
self.isa_flags.has_avx512vbmi()
1563
}
1564
}
1565
1566
impl MachInstEmit for Inst {
1567
type State = EmitState;
1568
type Info = EmitInfo;
1569
1570
fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
1571
emit::emit(self, sink, info, state);
1572
}
1573
1574
fn pretty_print_inst(&self, _: &mut Self::State) -> String {
1575
PrettyPrint::pretty_print(self, 0)
1576
}
1577
}
1578
1579
/// A label-use (internal relocation) in generated code.
1580
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1581
pub enum LabelUse {
1582
/// A 32-bit offset from location of relocation itself, added to the existing value at that
1583
/// location. Used for control flow instructions which consider an offset from the start of the
1584
/// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
1585
JmpRel32,
1586
1587
/// A 32-bit offset from location of relocation itself, added to the existing value at that
1588
/// location.
1589
PCRel32,
1590
}
1591
1592
impl MachInstLabelUse for LabelUse {
1593
const ALIGN: CodeOffset = 1;
1594
1595
fn max_pos_range(self) -> CodeOffset {
1596
match self {
1597
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
1598
}
1599
}
1600
1601
fn max_neg_range(self) -> CodeOffset {
1602
match self {
1603
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
1604
}
1605
}
1606
1607
fn patch_size(self) -> CodeOffset {
1608
match self {
1609
LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
1610
}
1611
}
1612
1613
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1614
let pc_rel = (label_offset as i64) - (use_offset as i64);
1615
debug_assert!(pc_rel <= self.max_pos_range() as i64);
1616
debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
1617
let pc_rel = pc_rel as u32;
1618
match self {
1619
LabelUse::JmpRel32 => {
1620
let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1621
let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
1622
buffer.copy_from_slice(&value.to_le_bytes()[..]);
1623
}
1624
LabelUse::PCRel32 => {
1625
let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1626
let value = pc_rel.wrapping_add(addend);
1627
buffer.copy_from_slice(&value.to_le_bytes()[..]);
1628
}
1629
}
1630
}
1631
1632
fn supports_veneer(self) -> bool {
1633
match self {
1634
LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
1635
}
1636
}
1637
1638
fn veneer_size(self) -> CodeOffset {
1639
match self {
1640
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
1641
}
1642
}
1643
1644
fn worst_case_veneer_size() -> CodeOffset {
1645
0
1646
}
1647
1648
fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
1649
match self {
1650
LabelUse::JmpRel32 | LabelUse::PCRel32 => {
1651
panic!("Veneer not supported for JumpRel32 label-use.");
1652
}
1653
}
1654
}
1655
1656
fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {
1657
match (reloc, addend) {
1658
(Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),
1659
_ => None,
1660
}
1661
}
1662
}
1663
1664