Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/winch/codegen/src/isa/x64/asm.rs
1693 views
1
//! Assembler library implementation for x64.
2
3
use crate::{
4
constant_pool::ConstantPool,
5
isa::{CallingConvention, reg::Reg},
6
masm::{
7
DivKind, Extend, ExtendKind, ExtendType, IntCmpKind, MulWideKind, OperandSize, RemKind,
8
RoundingMode, ShiftKind, Signed, V128ExtendKind, V128LoadExtendKind, Zero,
9
},
10
reg::writable,
11
};
12
use cranelift_codegen::{
13
CallInfo, Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState,
14
MachLabel, PatchRegion, Writable,
15
ir::{ExternalName, MemFlags, SourceLoc, TrapCode, Type, UserExternalNameRef, types},
16
isa::{
17
unwind::UnwindInst,
18
x64::{
19
AtomicRmwSeqOp, EmitInfo, EmitState, Inst,
20
args::{
21
self, Amode, CC, ExtMode, FromWritableReg, Gpr, GprMem, GprMemImm, RegMem,
22
RegMemImm, SyntheticAmode, WritableGpr, WritableXmm, Xmm, XmmMem, XmmMemImm,
23
},
24
external::{PairedGpr, PairedXmm},
25
settings as x64_settings,
26
},
27
},
28
settings,
29
};
30
31
use crate::reg::WritableReg;
32
use cranelift_assembler_x64 as asm;
33
34
use super::address::Address;
35
use smallvec::SmallVec;
36
37
// Conversions between winch-codegen x64 types and cranelift-codegen x64 types.
38
39
impl From<Reg> for RegMemImm {
40
fn from(reg: Reg) -> Self {
41
RegMemImm::reg(reg.into())
42
}
43
}
44
45
impl From<Reg> for RegMem {
46
fn from(value: Reg) -> Self {
47
RegMem::Reg { reg: value.into() }
48
}
49
}
50
51
impl From<Reg> for WritableGpr {
52
fn from(reg: Reg) -> Self {
53
let writable = Writable::from_reg(reg.into());
54
WritableGpr::from_writable_reg(writable).expect("valid writable gpr")
55
}
56
}
57
58
impl From<Reg> for WritableXmm {
59
fn from(reg: Reg) -> Self {
60
let writable = Writable::from_reg(reg.into());
61
WritableXmm::from_writable_reg(writable).expect("valid writable xmm")
62
}
63
}
64
65
/// Convert a writable GPR register to the read-write pair expected by
66
/// `cranelift-codegen`.
67
fn pair_gpr(reg: WritableReg) -> PairedGpr {
68
assert!(reg.to_reg().is_int());
69
let read = Gpr::unwrap_new(reg.to_reg().into());
70
let write = WritableGpr::from_reg(reg.to_reg().into());
71
PairedGpr { read, write }
72
}
73
74
impl From<Reg> for asm::Gpr<Gpr> {
75
fn from(reg: Reg) -> Self {
76
asm::Gpr::new(reg.into())
77
}
78
}
79
80
impl From<Reg> for asm::GprMem<Gpr, Gpr> {
81
fn from(reg: Reg) -> Self {
82
asm::GprMem::Gpr(reg.into())
83
}
84
}
85
86
/// Convert a writable XMM register to the read-write pair expected by
87
/// `cranelift-codegen`.
88
fn pair_xmm(reg: WritableReg) -> PairedXmm {
89
assert!(reg.to_reg().is_float());
90
let read = Xmm::unwrap_new(reg.to_reg().into());
91
let write = WritableXmm::from_reg(reg.to_reg().into());
92
PairedXmm { read, write }
93
}
94
95
impl From<Reg> for asm::Xmm<Xmm> {
96
fn from(reg: Reg) -> Self {
97
asm::Xmm::new(reg.into())
98
}
99
}
100
101
impl From<Reg> for asm::XmmMem<Xmm, Gpr> {
102
fn from(reg: Reg) -> Self {
103
asm::XmmMem::Xmm(reg.into())
104
}
105
}
106
107
impl From<Reg> for Gpr {
108
fn from(reg: Reg) -> Self {
109
Gpr::unwrap_new(reg.into())
110
}
111
}
112
113
impl From<Reg> for GprMem {
114
fn from(value: Reg) -> Self {
115
GprMem::unwrap_new(value.into())
116
}
117
}
118
119
impl From<Reg> for GprMemImm {
120
fn from(reg: Reg) -> Self {
121
GprMemImm::unwrap_new(reg.into())
122
}
123
}
124
125
impl From<Reg> for Xmm {
126
fn from(reg: Reg) -> Self {
127
Xmm::unwrap_new(reg.into())
128
}
129
}
130
131
impl From<Reg> for XmmMem {
132
fn from(value: Reg) -> Self {
133
XmmMem::unwrap_new(value.into())
134
}
135
}
136
137
impl From<Reg> for XmmMemImm {
138
fn from(value: Reg) -> Self {
139
XmmMemImm::unwrap_new(value.into())
140
}
141
}
142
143
impl From<OperandSize> for args::OperandSize {
144
fn from(size: OperandSize) -> Self {
145
match size {
146
OperandSize::S8 => Self::Size8,
147
OperandSize::S16 => Self::Size16,
148
OperandSize::S32 => Self::Size32,
149
OperandSize::S64 => Self::Size64,
150
s => panic!("Invalid operand size {s:?}"),
151
}
152
}
153
}
154
155
impl From<IntCmpKind> for CC {
156
fn from(value: IntCmpKind) -> Self {
157
match value {
158
IntCmpKind::Eq => CC::Z,
159
IntCmpKind::Ne => CC::NZ,
160
IntCmpKind::LtS => CC::L,
161
IntCmpKind::LtU => CC::B,
162
IntCmpKind::GtS => CC::NLE,
163
IntCmpKind::GtU => CC::NBE,
164
IntCmpKind::LeS => CC::LE,
165
IntCmpKind::LeU => CC::BE,
166
IntCmpKind::GeS => CC::NL,
167
IntCmpKind::GeU => CC::NB,
168
}
169
}
170
}
171
172
impl<T: ExtendType> From<Extend<T>> for ExtMode {
173
fn from(value: Extend<T>) -> Self {
174
match value {
175
Extend::I32Extend8 => ExtMode::BL,
176
Extend::I32Extend16 => ExtMode::WL,
177
Extend::I64Extend8 => ExtMode::BQ,
178
Extend::I64Extend16 => ExtMode::WQ,
179
Extend::I64Extend32 => ExtMode::LQ,
180
Extend::__Kind(_) => unreachable!(),
181
}
182
}
183
}
184
185
impl From<ExtendKind> for ExtMode {
186
fn from(value: ExtendKind) -> Self {
187
match value {
188
ExtendKind::Signed(s) => s.into(),
189
ExtendKind::Unsigned(u) => u.into(),
190
}
191
}
192
}
193
194
/// Kinds of extends supported by `vpmov`.
195
pub(super) enum VpmovKind {
196
/// Sign extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
197
E8x8S,
198
/// Zero extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
199
E8x8U,
200
/// Sign extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
201
E16x4S,
202
/// Zero extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
203
E16x4U,
204
/// Sign extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
205
E32x2S,
206
/// Zero extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
207
E32x2U,
208
}
209
210
impl From<V128LoadExtendKind> for VpmovKind {
211
fn from(value: V128LoadExtendKind) -> Self {
212
match value {
213
V128LoadExtendKind::E8x8S => Self::E8x8S,
214
V128LoadExtendKind::E8x8U => Self::E8x8U,
215
V128LoadExtendKind::E16x4S => Self::E16x4S,
216
V128LoadExtendKind::E16x4U => Self::E16x4U,
217
V128LoadExtendKind::E32x2S => Self::E32x2S,
218
V128LoadExtendKind::E32x2U => Self::E32x2U,
219
}
220
}
221
}
222
223
impl From<V128ExtendKind> for VpmovKind {
224
fn from(value: V128ExtendKind) -> Self {
225
match value {
226
V128ExtendKind::LowI8x16S | V128ExtendKind::HighI8x16S => Self::E8x8S,
227
V128ExtendKind::LowI8x16U => Self::E8x8U,
228
V128ExtendKind::LowI16x8S | V128ExtendKind::HighI16x8S => Self::E16x4S,
229
V128ExtendKind::LowI16x8U => Self::E16x4U,
230
V128ExtendKind::LowI32x4S | V128ExtendKind::HighI32x4S => Self::E32x2S,
231
V128ExtendKind::LowI32x4U => Self::E32x2U,
232
_ => unimplemented!(),
233
}
234
}
235
}
236
237
/// Kinds of comparisons supported by `vcmp`.
238
pub(super) enum VcmpKind {
239
/// Equal comparison.
240
Eq,
241
/// Not equal comparison.
242
Ne,
243
/// Less than comparison.
244
Lt,
245
/// Less than or equal comparison.
246
Le,
247
/// Unordered comparison. Sets result to all 1s if either source operand is
248
/// NaN.
249
Unord,
250
}
251
252
/// Kinds of conversions supported by `vcvt`.
253
pub(super) enum VcvtKind {
254
/// Converts 32-bit integers to 32-bit floats.
255
I32ToF32,
256
/// Converts doubleword integers to double precision floats.
257
I32ToF64,
258
/// Converts double precision floats to single precision floats.
259
F64ToF32,
260
// Converts double precision floats to 32-bit integers.
261
F64ToI32,
262
/// Converts single precision floats to double precision floats.
263
F32ToF64,
264
/// Converts single precision floats to 32-bit integers.
265
F32ToI32,
266
}
267
268
/// Modes supported by `vround`.
269
pub(crate) enum VroundMode {
270
/// Rounds toward nearest (ties to even).
271
TowardNearest,
272
/// Rounds toward negative infinity.
273
TowardNegativeInfinity,
274
/// Rounds toward positive infinity.
275
TowardPositiveInfinity,
276
/// Rounds toward zero.
277
TowardZero,
278
}
279
280
/// Low level assembler implementation for x64.
281
pub(crate) struct Assembler {
282
/// The machine instruction buffer.
283
buffer: MachBuffer<Inst>,
284
/// Constant emission information.
285
emit_info: EmitInfo,
286
/// Emission state.
287
emit_state: EmitState,
288
/// x64 flags.
289
isa_flags: x64_settings::Flags,
290
/// Constant pool.
291
pool: ConstantPool,
292
}
293
294
impl Assembler {
295
/// Create a new x64 assembler.
296
pub fn new(shared_flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
297
Self {
298
buffer: MachBuffer::<Inst>::new(),
299
emit_state: Default::default(),
300
emit_info: EmitInfo::new(shared_flags, isa_flags.clone()),
301
pool: ConstantPool::new(),
302
isa_flags,
303
}
304
}
305
306
/// Get a mutable reference to underlying
307
/// machine buffer.
308
pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {
309
&mut self.buffer
310
}
311
312
/// Get a reference to the underlying machine buffer.
313
pub fn buffer(&self) -> &MachBuffer<Inst> {
314
&self.buffer
315
}
316
317
/// Adds a constant to the constant pool and returns its address.
318
pub fn add_constant(&mut self, constant: &[u8]) -> Address {
319
let handle = self.pool.register(constant, &mut self.buffer);
320
Address::constant(handle)
321
}
322
323
/// Load a floating point constant, using the constant pool.
324
pub fn load_fp_const(&mut self, dst: WritableReg, constant: &[u8], size: OperandSize) {
325
let addr = self.add_constant(constant);
326
self.xmm_mov_mr(&addr, dst, size, MemFlags::trusted());
327
}
328
329
/// Return the emitted code.
330
pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {
331
let stencil = self
332
.buffer
333
.finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());
334
stencil.apply_base_srcloc(loc.unwrap_or_default())
335
}
336
337
fn emit(&mut self, inst: Inst) {
338
inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
339
}
340
341
fn to_synthetic_amode(addr: &Address, memflags: MemFlags) -> SyntheticAmode {
342
match *addr {
343
Address::Offset { base, offset } => {
344
let amode = Amode::imm_reg(offset as i32, base.into()).with_flags(memflags);
345
SyntheticAmode::real(amode)
346
}
347
Address::Const(c) => SyntheticAmode::ConstantOffset(c),
348
Address::ImmRegRegShift {
349
simm32,
350
base,
351
index,
352
shift,
353
} => SyntheticAmode::Real(Amode::ImmRegRegShift {
354
simm32,
355
base: base.into(),
356
index: index.into(),
357
shift,
358
flags: memflags,
359
}),
360
}
361
}
362
363
/// Emit an unwind instruction.
364
pub fn unwind_inst(&mut self, inst: UnwindInst) {
365
self.emit(Inst::Unwind { inst })
366
}
367
368
/// Push register.
369
pub fn push_r(&mut self, reg: Reg) {
370
let inst = asm::inst::pushq_o::new(reg).into();
371
self.emit(Inst::External { inst });
372
}
373
374
/// Pop to register.
375
pub fn pop_r(&mut self, dst: WritableReg) {
376
let writable: WritableGpr = dst.map(Into::into);
377
let inst = asm::inst::popq_o::new(writable).into();
378
self.emit(Inst::External { inst });
379
}
380
381
/// Return instruction.
382
pub fn ret(&mut self) {
383
let inst = asm::inst::retq_zo::new().into();
384
self.emit(Inst::External { inst });
385
}
386
387
/// Register-to-register move.
388
pub fn mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
389
let dst: WritableGpr = dst.map(|r| r.into());
390
let inst = match size {
391
OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),
392
OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),
393
OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),
394
OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),
395
_ => unreachable!(),
396
};
397
self.emit(Inst::External { inst });
398
}
399
400
/// Register-to-memory move.
401
pub fn mov_rm(&mut self, src: Reg, addr: &Address, size: OperandSize, flags: MemFlags) {
402
assert!(addr.is_offset());
403
let dst = Self::to_synthetic_amode(addr, flags);
404
let inst = match size {
405
OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),
406
OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),
407
OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),
408
OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),
409
_ => unreachable!(),
410
};
411
self.emit(Inst::External { inst });
412
}
413
414
/// Immediate-to-memory move.
415
pub fn mov_im(&mut self, src: i32, addr: &Address, size: OperandSize, flags: MemFlags) {
416
assert!(addr.is_offset());
417
let dst = Self::to_synthetic_amode(addr, flags);
418
let inst = match size {
419
OperandSize::S8 => {
420
let src = i8::try_from(src).unwrap();
421
asm::inst::movb_mi::new(dst, src.cast_unsigned()).into()
422
}
423
OperandSize::S16 => {
424
let src = i16::try_from(src).unwrap();
425
asm::inst::movw_mi::new(dst, src.cast_unsigned()).into()
426
}
427
OperandSize::S32 => asm::inst::movl_mi::new(dst, src.cast_unsigned()).into(),
428
OperandSize::S64 => asm::inst::movq_mi_sxl::new(dst, src).into(),
429
_ => unreachable!(),
430
};
431
self.emit(Inst::External { inst });
432
}
433
434
/// Immediate-to-register move.
435
pub fn mov_ir(&mut self, imm: u64, dst: WritableReg, size: OperandSize) {
436
self.emit(Inst::imm(size.into(), imm, dst.map(Into::into)));
437
}
438
439
/// Zero-extend memory-to-register load.
440
pub fn movzx_mr(
441
&mut self,
442
addr: &Address,
443
dst: WritableReg,
444
ext: Option<Extend<Zero>>,
445
memflags: MemFlags,
446
) {
447
let src = Self::to_synthetic_amode(addr, memflags);
448
449
if let Some(ext) = ext {
450
let dst = WritableGpr::from_reg(dst.to_reg().into());
451
let inst = match ext.into() {
452
ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
453
ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
454
ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
455
ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
456
ExtMode::LQ => {
457
// This instruction selection may seem strange but is
458
// correct in 64-bit mode: section 3.4.1.1 of the Intel
459
// manual says that "32-bit operands generate a 32-bit
460
// result, zero-extended to a 64-bit result in the
461
// destination general-purpose register." This is applicable
462
// beyond `mov` but we use this fact to zero-extend `src`
463
// into `dst`.
464
asm::inst::movl_rm::new(dst, src).into()
465
}
466
};
467
self.emit(Inst::External { inst });
468
} else {
469
let dst = WritableGpr::from_reg(dst.to_reg().into());
470
let inst = asm::inst::movq_rm::new(dst, src).into();
471
self.emit(Inst::External { inst });
472
}
473
}
474
475
// Sign-extend memory-to-register load.
476
pub fn movsx_mr(
477
&mut self,
478
addr: &Address,
479
dst: WritableReg,
480
ext: Extend<Signed>,
481
memflags: MemFlags,
482
) {
483
let src = Self::to_synthetic_amode(addr, memflags);
484
let dst = WritableGpr::from_reg(dst.to_reg().into());
485
let inst = match ext.into() {
486
ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
487
ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
488
ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
489
ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
490
ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
491
};
492
self.emit(Inst::External { inst });
493
}
494
495
/// Register-to-register move with zero extension.
496
pub fn movzx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Zero>) {
497
let dst = WritableGpr::from_reg(dst.to_reg().into());
498
let inst = match kind.into() {
499
ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
500
ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
501
ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
502
ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
503
ExtMode::LQ => {
504
// This instruction selection may seem strange but is correct in
505
// 64-bit mode: section 3.4.1.1 of the Intel manual says that
506
// "32-bit operands generate a 32-bit result, zero-extended to a
507
// 64-bit result in the destination general-purpose register."
508
// This is applicable beyond `mov` but we use this fact to
509
// zero-extend `src` into `dst`.
510
asm::inst::movl_rm::new(dst, src).into()
511
}
512
};
513
self.emit(Inst::External { inst });
514
}
515
516
/// Register-to-register move with sign extension.
517
pub fn movsx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Signed>) {
518
let dst = WritableGpr::from_reg(dst.to_reg().into());
519
let inst = match kind.into() {
520
ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
521
ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
522
ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
523
ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
524
ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
525
};
526
self.emit(Inst::External { inst });
527
}
528
529
/// Integer register conditional move.
530
pub fn cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
531
use IntCmpKind::*;
532
use OperandSize::*;
533
534
let dst: WritableGpr = dst.map(Into::into);
535
let inst = match size {
536
S8 | S16 | S32 => match cc {
537
Eq => asm::inst::cmovel_rm::new(dst, src).into(),
538
Ne => asm::inst::cmovnel_rm::new(dst, src).into(),
539
LtS => asm::inst::cmovll_rm::new(dst, src).into(),
540
LtU => asm::inst::cmovbl_rm::new(dst, src).into(),
541
GtS => asm::inst::cmovgl_rm::new(dst, src).into(),
542
GtU => asm::inst::cmoval_rm::new(dst, src).into(),
543
LeS => asm::inst::cmovlel_rm::new(dst, src).into(),
544
LeU => asm::inst::cmovbel_rm::new(dst, src).into(),
545
GeS => asm::inst::cmovgel_rm::new(dst, src).into(),
546
GeU => asm::inst::cmovael_rm::new(dst, src).into(),
547
},
548
S64 => match cc {
549
Eq => asm::inst::cmoveq_rm::new(dst, src).into(),
550
Ne => asm::inst::cmovneq_rm::new(dst, src).into(),
551
LtS => asm::inst::cmovlq_rm::new(dst, src).into(),
552
LtU => asm::inst::cmovbq_rm::new(dst, src).into(),
553
GtS => asm::inst::cmovgq_rm::new(dst, src).into(),
554
GtU => asm::inst::cmovaq_rm::new(dst, src).into(),
555
LeS => asm::inst::cmovleq_rm::new(dst, src).into(),
556
LeU => asm::inst::cmovbeq_rm::new(dst, src).into(),
557
GeS => asm::inst::cmovgeq_rm::new(dst, src).into(),
558
GeU => asm::inst::cmovaeq_rm::new(dst, src).into(),
559
},
560
_ => unreachable!(),
561
};
562
self.emit(Inst::External { inst });
563
}
564
565
/// Single and double precision floating point
566
/// register-to-register move.
567
pub fn xmm_mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
568
let ty = match size {
569
OperandSize::S32 => types::F32,
570
OperandSize::S64 => types::F64,
571
OperandSize::S128 => types::I32X4,
572
OperandSize::S8 | OperandSize::S16 => unreachable!(),
573
};
574
self.emit(Inst::gen_move(dst.map(|r| r.into()), src.into(), ty));
575
}
576
577
/// Single and double precision floating point load.
578
pub fn xmm_mov_mr(
579
&mut self,
580
src: &Address,
581
dst: WritableReg,
582
size: OperandSize,
583
flags: MemFlags,
584
) {
585
use OperandSize::*;
586
587
assert!(dst.to_reg().is_float());
588
589
let src = Self::to_synthetic_amode(src, flags);
590
let dst: WritableXmm = dst.map(|r| r.into());
591
let inst = match size {
592
S32 => asm::inst::movss_a_m::new(dst, src).into(),
593
S64 => asm::inst::movsd_a_m::new(dst, src).into(),
594
S128 => asm::inst::movdqu_a::new(dst, src).into(),
595
S8 | S16 => unreachable!(),
596
};
597
self.emit(Inst::External { inst });
598
}
599
600
/// Vector load and extend.
601
pub fn xmm_vpmov_mr(
602
&mut self,
603
src: &Address,
604
dst: WritableReg,
605
kind: VpmovKind,
606
flags: MemFlags,
607
) {
608
assert!(dst.to_reg().is_float());
609
let src = Self::to_synthetic_amode(src, flags);
610
let dst: WritableXmm = dst.map(|r| r.into());
611
let inst = match kind {
612
VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),
613
VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),
614
VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),
615
VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),
616
VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),
617
VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),
618
};
619
self.emit(Inst::External { inst });
620
}
621
622
/// Extends vector of integers in `src` and puts results in `dst`.
623
pub fn xmm_vpmov_rr(&mut self, src: Reg, dst: WritableReg, kind: VpmovKind) {
624
let dst: WritableXmm = dst.map(|r| r.into());
625
let inst = match kind {
626
VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),
627
VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),
628
VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),
629
VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),
630
VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),
631
VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),
632
};
633
self.emit(Inst::External { inst });
634
}
635
636
/// Vector load and broadcast.
637
pub fn xmm_vpbroadcast_mr(
638
&mut self,
639
src: &Address,
640
dst: WritableReg,
641
size: OperandSize,
642
flags: MemFlags,
643
) {
644
assert!(dst.to_reg().is_float());
645
let src = Self::to_synthetic_amode(src, flags);
646
let dst: WritableXmm = dst.map(|r| r.into());
647
let inst = match size {
648
OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),
649
OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),
650
OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),
651
_ => unimplemented!(),
652
};
653
self.emit(Inst::External { inst });
654
}
655
656
/// Value in `src` is broadcast into lanes of `size` in `dst`.
657
pub fn xmm_vpbroadcast_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
658
assert!(src.is_float() && dst.to_reg().is_float());
659
let dst: WritableXmm = dst.map(|r| r.into());
660
let inst = match size {
661
OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),
662
OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),
663
OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),
664
_ => unimplemented!(),
665
};
666
self.emit(Inst::External { inst });
667
}
668
669
/// Memory to register shuffle of bytes in vector.
670
pub fn xmm_vpshuf_mr(
671
&mut self,
672
src: &Address,
673
dst: WritableReg,
674
mask: u8,
675
size: OperandSize,
676
flags: MemFlags,
677
) {
678
let dst: WritableXmm = dst.map(|r| r.into());
679
let src = Self::to_synthetic_amode(src, flags);
680
let inst = match size {
681
OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),
682
_ => unimplemented!(),
683
};
684
self.emit(Inst::External { inst });
685
}
686
687
/// Register to register shuffle of bytes in vector.
688
pub fn xmm_vpshuf_rr(&mut self, src: Reg, dst: WritableReg, mask: u8, size: OperandSize) {
689
let dst: WritableXmm = dst.map(|r| r.into());
690
691
let inst = match size {
692
OperandSize::S16 => asm::inst::vpshuflw_a::new(dst, src, mask).into(),
693
OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),
694
_ => unimplemented!(),
695
};
696
697
self.emit(Inst::External { inst });
698
}
699
700
/// Single and double precision floating point store.
701
pub fn xmm_mov_rm(&mut self, src: Reg, dst: &Address, size: OperandSize, flags: MemFlags) {
702
use OperandSize::*;
703
704
assert!(src.is_float());
705
706
let dst = Self::to_synthetic_amode(dst, flags);
707
let src: Xmm = src.into();
708
let inst = match size {
709
S32 => asm::inst::movss_c_m::new(dst, src).into(),
710
S64 => asm::inst::movsd_c_m::new(dst, src).into(),
711
S128 => asm::inst::movdqu_b::new(dst, src).into(),
712
S16 | S8 => unreachable!(),
713
};
714
self.emit(Inst::External { inst })
715
}
716
717
/// Floating point register conditional move.
718
pub fn xmm_cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
719
let dst: WritableXmm = dst.map(Into::into);
720
let ty = match size {
721
OperandSize::S32 => types::F32,
722
OperandSize::S64 => types::F64,
723
// Move the entire 128 bits via movdqa.
724
OperandSize::S128 => types::I32X4,
725
OperandSize::S8 | OperandSize::S16 => unreachable!(),
726
};
727
728
self.emit(Inst::XmmCmove {
729
ty,
730
cc: cc.into(),
731
consequent: Xmm::unwrap_new(src.into()),
732
alternative: dst.to_reg(),
733
dst,
734
})
735
}
736
737
/// Subtract register and register
738
pub fn sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
739
let dst = pair_gpr(dst);
740
let inst = match size {
741
OperandSize::S8 => asm::inst::subb_rm::new(dst, src).into(),
742
OperandSize::S16 => asm::inst::subw_rm::new(dst, src).into(),
743
OperandSize::S32 => asm::inst::subl_rm::new(dst, src).into(),
744
OperandSize::S64 => asm::inst::subq_rm::new(dst, src).into(),
745
OperandSize::S128 => unimplemented!(),
746
};
747
self.emit(Inst::External { inst });
748
}
749
750
/// Subtract immediate register.
751
pub fn sub_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
752
let dst = pair_gpr(dst);
753
let inst = match size {
754
OperandSize::S8 => asm::inst::subb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
755
OperandSize::S16 => asm::inst::subw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
756
OperandSize::S32 => asm::inst::subl_mi::new(dst, imm as u32).into(),
757
OperandSize::S64 => asm::inst::subq_mi_sxl::new(dst, imm).into(),
758
OperandSize::S128 => unimplemented!(),
759
};
760
self.emit(Inst::External { inst });
761
}
762
763
/// "and" two registers.
764
pub fn and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
765
let dst = pair_gpr(dst);
766
let inst = match size {
767
OperandSize::S8 => asm::inst::andb_rm::new(dst, src).into(),
768
OperandSize::S16 => asm::inst::andw_rm::new(dst, src).into(),
769
OperandSize::S32 => asm::inst::andl_rm::new(dst, src).into(),
770
OperandSize::S64 => asm::inst::andq_rm::new(dst, src).into(),
771
OperandSize::S128 => unimplemented!(),
772
};
773
self.emit(Inst::External { inst });
774
}
775
776
pub fn and_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
777
let dst = pair_gpr(dst);
778
let inst = match size {
779
OperandSize::S8 => asm::inst::andb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
780
OperandSize::S16 => asm::inst::andw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
781
OperandSize::S32 => asm::inst::andl_mi::new(dst, imm as u32).into(),
782
OperandSize::S64 => asm::inst::andq_mi_sxl::new(dst, imm).into(),
783
OperandSize::S128 => unimplemented!(),
784
};
785
self.emit(Inst::External { inst });
786
}
787
788
/// "and" two float registers.
789
pub fn xmm_and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
790
let dst = pair_xmm(dst);
791
let inst = match size {
792
OperandSize::S32 => asm::inst::andps_a::new(dst, src).into(),
793
OperandSize::S64 => asm::inst::andpd_a::new(dst, src).into(),
794
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
795
};
796
self.emit(Inst::External { inst });
797
}
798
799
/// "and not" two float registers.
800
pub fn xmm_andn_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
801
let dst = pair_xmm(dst);
802
let inst = match size {
803
OperandSize::S32 => asm::inst::andnps_a::new(dst, src).into(),
804
OperandSize::S64 => asm::inst::andnpd_a::new(dst, src).into(),
805
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
806
};
807
self.emit(Inst::External { inst });
808
}
809
810
pub fn gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
811
let dst: WritableXmm = dst.map(|r| r.into());
812
let inst = match size {
813
OperandSize::S32 => asm::inst::movd_a::new(dst, src).into(),
814
OperandSize::S64 => asm::inst::movq_a::new(dst, src).into(),
815
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
816
};
817
818
self.emit(Inst::External { inst });
819
}
820
821
pub fn xmm_to_gpr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
822
let dst: WritableGpr = dst.map(Into::into);
823
let src: Xmm = src.into();
824
let inst = match size {
825
OperandSize::S32 => asm::inst::movd_b::new(dst, src).into(),
826
OperandSize::S64 => asm::inst::movq_b::new(dst, src).into(),
827
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
828
};
829
830
self.emit(Inst::External { inst })
831
}
832
833
/// Convert float to signed int.
834
pub fn cvt_float_to_sint_seq(
835
&mut self,
836
src: Reg,
837
dst: WritableReg,
838
tmp_gpr: Reg,
839
tmp_xmm: Reg,
840
src_size: OperandSize,
841
dst_size: OperandSize,
842
saturating: bool,
843
) {
844
self.emit(Inst::CvtFloatToSintSeq {
845
dst_size: dst_size.into(),
846
src_size: src_size.into(),
847
is_saturating: saturating,
848
src: src.into(),
849
dst: dst.map(Into::into),
850
tmp_gpr: tmp_gpr.into(),
851
tmp_xmm: tmp_xmm.into(),
852
});
853
}
854
855
/// Convert float to unsigned int.
856
pub fn cvt_float_to_uint_seq(
857
&mut self,
858
src: Reg,
859
dst: WritableReg,
860
tmp_gpr: Reg,
861
tmp_xmm: Reg,
862
tmp_xmm2: Reg,
863
src_size: OperandSize,
864
dst_size: OperandSize,
865
saturating: bool,
866
) {
867
self.emit(Inst::CvtFloatToUintSeq {
868
dst_size: dst_size.into(),
869
src_size: src_size.into(),
870
is_saturating: saturating,
871
src: src.into(),
872
dst: dst.map(Into::into),
873
tmp_gpr: tmp_gpr.into(),
874
tmp_xmm: tmp_xmm.into(),
875
tmp_xmm2: tmp_xmm2.into(),
876
});
877
}
878
879
/// Convert signed int to float.
880
pub fn cvt_sint_to_float(
881
&mut self,
882
src: Reg,
883
dst: WritableReg,
884
src_size: OperandSize,
885
dst_size: OperandSize,
886
) {
887
use OperandSize::*;
888
let dst = pair_xmm(dst);
889
let inst = match (src_size, dst_size) {
890
(S32, S32) => asm::inst::cvtsi2ssl_a::new(dst, src).into(),
891
(S32, S64) => asm::inst::cvtsi2sdl_a::new(dst, src).into(),
892
(S64, S32) => asm::inst::cvtsi2ssq_a::new(dst, src).into(),
893
(S64, S64) => asm::inst::cvtsi2sdq_a::new(dst, src).into(),
894
_ => unreachable!(),
895
};
896
self.emit(Inst::External { inst });
897
}
898
899
/// Convert unsigned 64-bit int to float.
900
pub fn cvt_uint64_to_float_seq(
901
&mut self,
902
src: Reg,
903
dst: WritableReg,
904
tmp_gpr1: Reg,
905
tmp_gpr2: Reg,
906
dst_size: OperandSize,
907
) {
908
self.emit(Inst::CvtUint64ToFloatSeq {
909
dst_size: dst_size.into(),
910
src: src.into(),
911
dst: dst.map(Into::into),
912
tmp_gpr1: tmp_gpr1.into(),
913
tmp_gpr2: tmp_gpr2.into(),
914
});
915
}
916
917
/// Change precision of float.
918
pub fn cvt_float_to_float(
919
&mut self,
920
src: Reg,
921
dst: WritableReg,
922
src_size: OperandSize,
923
dst_size: OperandSize,
924
) {
925
use OperandSize::*;
926
let dst = pair_xmm(dst);
927
let inst = match (src_size, dst_size) {
928
(S32, S64) => asm::inst::cvtss2sd_a::new(dst, src).into(),
929
(S64, S32) => asm::inst::cvtsd2ss_a::new(dst, src).into(),
930
_ => unimplemented!(),
931
};
932
self.emit(Inst::External { inst });
933
}
934
935
pub fn or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
936
let dst = pair_gpr(dst);
937
let inst = match size {
938
OperandSize::S8 => asm::inst::orb_rm::new(dst, src).into(),
939
OperandSize::S16 => asm::inst::orw_rm::new(dst, src).into(),
940
OperandSize::S32 => asm::inst::orl_rm::new(dst, src).into(),
941
OperandSize::S64 => asm::inst::orq_rm::new(dst, src).into(),
942
OperandSize::S128 => unimplemented!(),
943
};
944
self.emit(Inst::External { inst });
945
}
946
947
pub fn or_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
948
let dst = pair_gpr(dst);
949
let inst = match size {
950
OperandSize::S8 => asm::inst::orb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
951
OperandSize::S16 => asm::inst::orw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
952
OperandSize::S32 => asm::inst::orl_mi::new(dst, imm as u32).into(),
953
OperandSize::S64 => asm::inst::orq_mi_sxl::new(dst, imm).into(),
954
OperandSize::S128 => unimplemented!(),
955
};
956
self.emit(Inst::External { inst });
957
}
958
959
pub fn xmm_or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
960
let dst = pair_xmm(dst);
961
let inst = match size {
962
OperandSize::S32 => asm::inst::orps_a::new(dst, src).into(),
963
OperandSize::S64 => asm::inst::orpd_a::new(dst, src).into(),
964
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
965
};
966
self.emit(Inst::External { inst });
967
}
968
969
/// Logical exclusive or with registers.
970
pub fn xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
971
let dst = pair_gpr(dst);
972
let inst = match size {
973
OperandSize::S8 => asm::inst::xorb_rm::new(dst, src).into(),
974
OperandSize::S16 => asm::inst::xorw_rm::new(dst, src).into(),
975
OperandSize::S32 => asm::inst::xorl_rm::new(dst, src).into(),
976
OperandSize::S64 => asm::inst::xorq_rm::new(dst, src).into(),
977
OperandSize::S128 => unimplemented!(),
978
};
979
self.emit(Inst::External { inst });
980
}
981
982
pub fn xor_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
983
let dst = pair_gpr(dst);
984
let inst = match size {
985
OperandSize::S8 => asm::inst::xorb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
986
OperandSize::S16 => asm::inst::xorw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
987
OperandSize::S32 => asm::inst::xorl_mi::new(dst, imm as u32).into(),
988
OperandSize::S64 => asm::inst::xorq_mi_sxl::new(dst, imm).into(),
989
OperandSize::S128 => unimplemented!(),
990
};
991
self.emit(Inst::External { inst });
992
}
993
994
/// Logical exclusive or with float registers.
995
pub fn xmm_xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
996
let dst = pair_xmm(dst);
997
let inst = match size {
998
OperandSize::S32 => asm::inst::xorps_a::new(dst, src).into(),
999
OperandSize::S64 => asm::inst::xorpd_a::new(dst, src).into(),
1000
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1001
};
1002
self.emit(Inst::External { inst });
1003
}
1004
1005
/// Shift with register and register.
1006
pub fn shift_rr(&mut self, src: Reg, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1007
let dst = pair_gpr(dst);
1008
let src: Gpr = src.into();
1009
let inst = match (kind, size) {
1010
(ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mc::new(dst, src).into(),
1011
(ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mc::new(dst, src).into(),
1012
(ShiftKind::Shl, _) => todo!(),
1013
(ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mc::new(dst, src).into(),
1014
(ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mc::new(dst, src).into(),
1015
(ShiftKind::ShrS, _) => todo!(),
1016
(ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mc::new(dst, src).into(),
1017
(ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mc::new(dst, src).into(),
1018
(ShiftKind::ShrU, _) => todo!(),
1019
(ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mc::new(dst, src).into(),
1020
(ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mc::new(dst, src).into(),
1021
(ShiftKind::Rotl, _) => todo!(),
1022
(ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mc::new(dst, src).into(),
1023
(ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mc::new(dst, src).into(),
1024
(ShiftKind::Rotr, _) => todo!(),
1025
};
1026
self.emit(Inst::External { inst });
1027
}
1028
1029
/// Shift with immediate and register.
1030
pub fn shift_ir(&mut self, imm: u8, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1031
let dst = pair_gpr(dst);
1032
let inst = match (kind, size) {
1033
(ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mi::new(dst, imm).into(),
1034
(ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mi::new(dst, imm).into(),
1035
(ShiftKind::Shl, _) => todo!(),
1036
(ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mi::new(dst, imm).into(),
1037
(ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mi::new(dst, imm).into(),
1038
(ShiftKind::ShrS, _) => todo!(),
1039
(ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mi::new(dst, imm).into(),
1040
(ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mi::new(dst, imm).into(),
1041
(ShiftKind::ShrU, _) => todo!(),
1042
(ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mi::new(dst, imm).into(),
1043
(ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mi::new(dst, imm).into(),
1044
(ShiftKind::Rotl, _) => todo!(),
1045
(ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mi::new(dst, imm).into(),
1046
(ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mi::new(dst, imm).into(),
1047
(ShiftKind::Rotr, _) => todo!(),
1048
};
1049
self.emit(Inst::External { inst });
1050
}
1051
1052
/// Signed/unsigned division.
1053
///
1054
/// Emits a sequence of instructions to ensure the correctness of
1055
/// the division invariants. This function assumes that the
1056
/// caller has correctly allocated the dividend as `(rdx:rax)` and
1057
/// accounted for the quotient to be stored in `rax`.
1058
pub fn div(&mut self, divisor: Reg, dst: (Reg, Reg), kind: DivKind, size: OperandSize) {
1059
let trap = match kind {
1060
// Signed division has two trapping conditions, integer overflow and
1061
// divide-by-zero. Check for divide-by-zero explicitly and let the
1062
// hardware detect overflow.
1063
DivKind::Signed => {
1064
self.cmp_ir(divisor, 0, size);
1065
self.emit(Inst::TrapIf {
1066
cc: CC::Z,
1067
trap_code: TrapCode::INTEGER_DIVISION_BY_ZERO,
1068
});
1069
1070
// Sign-extend the dividend with tailor-made instructoins for
1071
// just this operation.
1072
let ext_dst: WritableGpr = dst.1.into();
1073
let ext_src: Gpr = dst.0.into();
1074
let inst = match size {
1075
OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),
1076
OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),
1077
_ => unimplemented!(),
1078
};
1079
self.emit(Inst::External { inst });
1080
TrapCode::INTEGER_OVERFLOW
1081
}
1082
1083
// Unsigned division only traps in one case, on divide-by-zero, so
1084
// defer that to the trap opcode.
1085
//
1086
// The divisor_hi reg is initialized with zero through an
1087
// xor-against-itself op.
1088
DivKind::Unsigned => {
1089
self.xor_rr(dst.1, writable!(dst.1), size);
1090
TrapCode::INTEGER_DIVISION_BY_ZERO
1091
}
1092
};
1093
let dst0 = pair_gpr(writable!(dst.0));
1094
let dst1 = pair_gpr(writable!(dst.1));
1095
let inst = match (kind, size) {
1096
(DivKind::Signed, OperandSize::S32) => {
1097
asm::inst::idivl_m::new(dst0, dst1, divisor, trap).into()
1098
}
1099
(DivKind::Unsigned, OperandSize::S32) => {
1100
asm::inst::divl_m::new(dst0, dst1, divisor, trap).into()
1101
}
1102
(DivKind::Signed, OperandSize::S64) => {
1103
asm::inst::idivq_m::new(dst0, dst1, divisor, trap).into()
1104
}
1105
(DivKind::Unsigned, OperandSize::S64) => {
1106
asm::inst::divq_m::new(dst0, dst1, divisor, trap).into()
1107
}
1108
_ => todo!(),
1109
};
1110
self.emit(Inst::External { inst });
1111
}
1112
1113
/// Signed/unsigned remainder.
1114
///
1115
/// Emits a sequence of instructions to ensure the correctness of the
1116
/// division invariants and ultimately calculate the remainder.
1117
/// This function assumes that the
1118
/// caller has correctly allocated the dividend as `(rdx:rax)` and
1119
/// accounted for the remainder to be stored in `rdx`.
1120
pub fn rem(&mut self, divisor: Reg, dst: (Reg, Reg), kind: RemKind, size: OperandSize) {
1121
match kind {
1122
// Signed remainder goes through a pseudo-instruction which has
1123
// some internal branching. The `dividend_hi`, or `rdx`, is
1124
// initialized here with a `SignExtendData` instruction.
1125
RemKind::Signed => {
1126
let ext_dst: WritableGpr = dst.1.into();
1127
1128
// Initialize `dividend_hi`, or `rdx`, with a tailor-made
1129
// instruction for this operation.
1130
let ext_src: Gpr = dst.0.into();
1131
let inst = match size {
1132
OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),
1133
OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),
1134
_ => unimplemented!(),
1135
};
1136
self.emit(Inst::External { inst });
1137
self.emit(Inst::CheckedSRemSeq {
1138
size: size.into(),
1139
divisor: divisor.into(),
1140
dividend_lo: dst.0.into(),
1141
dividend_hi: dst.1.into(),
1142
dst_quotient: dst.0.into(),
1143
dst_remainder: dst.1.into(),
1144
});
1145
}
1146
1147
// Unsigned remainder initializes `dividend_hi` with zero and
1148
// then executes a normal `div` instruction.
1149
RemKind::Unsigned => {
1150
self.xor_rr(dst.1, writable!(dst.1), size);
1151
let dst0 = pair_gpr(writable!(dst.0));
1152
let dst1 = pair_gpr(writable!(dst.1));
1153
let trap = TrapCode::INTEGER_DIVISION_BY_ZERO;
1154
let inst = match size {
1155
OperandSize::S32 => asm::inst::divl_m::new(dst0, dst1, divisor, trap).into(),
1156
OperandSize::S64 => asm::inst::divq_m::new(dst0, dst1, divisor, trap).into(),
1157
_ => todo!(),
1158
};
1159
self.emit(Inst::External { inst });
1160
}
1161
}
1162
}
1163
1164
/// Multiply immediate and register.
1165
pub fn mul_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1166
use OperandSize::*;
1167
let src = dst.to_reg();
1168
let dst: WritableGpr = dst.to_reg().into();
1169
let inst = match size {
1170
S16 => asm::inst::imulw_rmi::new(dst, src, u16::try_from(imm).unwrap()).into(),
1171
S32 => asm::inst::imull_rmi::new(dst, src, imm as u32).into(),
1172
S64 => asm::inst::imulq_rmi_sxl::new(dst, src, imm).into(),
1173
S8 | S128 => unimplemented!(),
1174
};
1175
self.emit(Inst::External { inst });
1176
}
1177
1178
/// Multiply register and register.
1179
pub fn mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1180
use OperandSize::*;
1181
let dst = pair_gpr(dst);
1182
let inst = match size {
1183
S16 => asm::inst::imulw_rm::new(dst, src).into(),
1184
S32 => asm::inst::imull_rm::new(dst, src).into(),
1185
S64 => asm::inst::imulq_rm::new(dst, src).into(),
1186
S8 | S128 => unimplemented!(),
1187
};
1188
self.emit(Inst::External { inst });
1189
}
1190
1191
/// Add immediate and register.
1192
pub fn add_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1193
let dst = pair_gpr(dst);
1194
let inst = match size {
1195
OperandSize::S8 => asm::inst::addb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
1196
OperandSize::S16 => asm::inst::addw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
1197
OperandSize::S32 => asm::inst::addl_mi::new(dst, imm as u32).into(),
1198
OperandSize::S64 => asm::inst::addq_mi_sxl::new(dst, imm).into(),
1199
OperandSize::S128 => unimplemented!(),
1200
};
1201
self.emit(Inst::External { inst });
1202
}
1203
1204
/// Add register and register.
1205
pub fn add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1206
let dst = pair_gpr(dst);
1207
let inst = match size {
1208
OperandSize::S8 => asm::inst::addb_rm::new(dst, src).into(),
1209
OperandSize::S16 => asm::inst::addw_rm::new(dst, src).into(),
1210
OperandSize::S32 => asm::inst::addl_rm::new(dst, src).into(),
1211
OperandSize::S64 => asm::inst::addq_rm::new(dst, src).into(),
1212
OperandSize::S128 => unimplemented!(),
1213
};
1214
self.emit(Inst::External { inst });
1215
}
1216
1217
pub fn lock_xadd(
1218
&mut self,
1219
addr: Address,
1220
dst: WritableReg,
1221
size: OperandSize,
1222
flags: MemFlags,
1223
) {
1224
assert!(addr.is_offset());
1225
let mem = Self::to_synthetic_amode(&addr, flags);
1226
let dst = pair_gpr(dst);
1227
let inst = match size {
1228
OperandSize::S8 => asm::inst::lock_xaddb_mr::new(mem, dst).into(),
1229
OperandSize::S16 => asm::inst::lock_xaddw_mr::new(mem, dst).into(),
1230
OperandSize::S32 => asm::inst::lock_xaddl_mr::new(mem, dst).into(),
1231
OperandSize::S64 => asm::inst::lock_xaddq_mr::new(mem, dst).into(),
1232
OperandSize::S128 => unimplemented!(),
1233
};
1234
1235
self.emit(Inst::External { inst });
1236
}
1237
1238
pub fn atomic_rmw_seq(
1239
&mut self,
1240
addr: Address,
1241
operand: Reg,
1242
dst: WritableReg,
1243
temp: WritableReg,
1244
size: OperandSize,
1245
flags: MemFlags,
1246
op: AtomicRmwSeqOp,
1247
) {
1248
assert!(addr.is_offset());
1249
let mem = Self::to_synthetic_amode(&addr, flags);
1250
self.emit(Inst::AtomicRmwSeq {
1251
ty: Type::int_with_byte_size(size.bytes() as _).unwrap(),
1252
mem,
1253
operand: operand.into(),
1254
temp: temp.map(Into::into),
1255
dst_old: dst.map(Into::into),
1256
op,
1257
});
1258
}
1259
1260
pub fn xchg(&mut self, addr: Address, dst: WritableReg, size: OperandSize, flags: MemFlags) {
1261
assert!(addr.is_offset());
1262
let mem = Self::to_synthetic_amode(&addr, flags);
1263
let dst = pair_gpr(dst);
1264
let inst = match size {
1265
OperandSize::S8 => asm::inst::xchgb_rm::new(dst, mem).into(),
1266
OperandSize::S16 => asm::inst::xchgw_rm::new(dst, mem).into(),
1267
OperandSize::S32 => asm::inst::xchgl_rm::new(dst, mem).into(),
1268
OperandSize::S64 => asm::inst::xchgq_rm::new(dst, mem).into(),
1269
OperandSize::S128 => unimplemented!(),
1270
};
1271
1272
self.emit(Inst::External { inst });
1273
}
1274
pub fn cmpxchg(
1275
&mut self,
1276
addr: Address,
1277
replacement: Reg,
1278
dst: WritableReg,
1279
size: OperandSize,
1280
flags: MemFlags,
1281
) {
1282
assert!(addr.is_offset());
1283
let mem = Self::to_synthetic_amode(&addr, flags);
1284
let dst = pair_gpr(dst);
1285
let inst = match size {
1286
OperandSize::S8 => asm::inst::lock_cmpxchgb_mr::new(mem, replacement, dst).into(),
1287
OperandSize::S16 => asm::inst::lock_cmpxchgw_mr::new(mem, replacement, dst).into(),
1288
OperandSize::S32 => asm::inst::lock_cmpxchgl_mr::new(mem, replacement, dst).into(),
1289
OperandSize::S64 => asm::inst::lock_cmpxchgq_mr::new(mem, replacement, dst).into(),
1290
OperandSize::S128 => unimplemented!(),
1291
};
1292
1293
self.emit(Inst::External { inst });
1294
}
1295
1296
pub fn cmp_ir(&mut self, src1: Reg, imm: i32, size: OperandSize) {
1297
let inst = match size {
1298
OperandSize::S8 => {
1299
let imm = i8::try_from(imm).unwrap();
1300
asm::inst::cmpb_mi::new(src1, imm.cast_unsigned()).into()
1301
}
1302
OperandSize::S16 => match i8::try_from(imm) {
1303
Ok(imm8) => asm::inst::cmpw_mi_sxb::new(src1, imm8).into(),
1304
Err(_) => {
1305
asm::inst::cmpw_mi::new(src1, i16::try_from(imm).unwrap().cast_unsigned())
1306
.into()
1307
}
1308
},
1309
OperandSize::S32 => match i8::try_from(imm) {
1310
Ok(imm8) => asm::inst::cmpl_mi_sxb::new(src1, imm8).into(),
1311
Err(_) => asm::inst::cmpl_mi::new(src1, imm.cast_unsigned()).into(),
1312
},
1313
OperandSize::S64 => match i8::try_from(imm) {
1314
Ok(imm8) => asm::inst::cmpq_mi_sxb::new(src1, imm8).into(),
1315
Err(_) => asm::inst::cmpq_mi::new(src1, imm).into(),
1316
},
1317
OperandSize::S128 => unimplemented!(),
1318
};
1319
1320
self.emit(Inst::External { inst });
1321
}
1322
1323
pub fn cmp_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1324
let inst = match size {
1325
OperandSize::S8 => asm::inst::cmpb_rm::new(src1, src2).into(),
1326
OperandSize::S16 => asm::inst::cmpw_rm::new(src1, src2).into(),
1327
OperandSize::S32 => asm::inst::cmpl_rm::new(src1, src2).into(),
1328
OperandSize::S64 => asm::inst::cmpq_rm::new(src1, src2).into(),
1329
OperandSize::S128 => unimplemented!(),
1330
};
1331
1332
self.emit(Inst::External { inst });
1333
}
1334
1335
/// Compares values in src1 and src2 and sets ZF, PF, and CF flags in EFLAGS
1336
/// register.
1337
pub fn ucomis(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1338
let inst = match size {
1339
OperandSize::S32 => asm::inst::ucomiss_a::new(src1, src2).into(),
1340
OperandSize::S64 => asm::inst::ucomisd_a::new(src1, src2).into(),
1341
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1342
};
1343
self.emit(Inst::External { inst });
1344
}
1345
1346
pub fn popcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1347
assert!(
1348
self.isa_flags.has_popcnt() && self.isa_flags.has_sse42(),
1349
"Requires has_popcnt and has_sse42 flags"
1350
);
1351
let dst = WritableGpr::from_reg(dst.to_reg().into());
1352
let inst = match size {
1353
OperandSize::S16 => asm::inst::popcntw_rm::new(dst, src).into(),
1354
OperandSize::S32 => asm::inst::popcntl_rm::new(dst, src).into(),
1355
OperandSize::S64 => asm::inst::popcntq_rm::new(dst, src).into(),
1356
OperandSize::S8 | OperandSize::S128 => unreachable!(),
1357
};
1358
self.emit(Inst::External { inst });
1359
}
1360
1361
/// Emit a test instruction with two register operands.
1362
pub fn test_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1363
let inst = match size {
1364
OperandSize::S8 => asm::inst::testb_mr::new(src1, src2).into(),
1365
OperandSize::S16 => asm::inst::testw_mr::new(src1, src2).into(),
1366
OperandSize::S32 => asm::inst::testl_mr::new(src1, src2).into(),
1367
OperandSize::S64 => asm::inst::testq_mr::new(src1, src2).into(),
1368
OperandSize::S128 => unimplemented!(),
1369
};
1370
1371
self.emit(Inst::External { inst });
1372
}
1373
1374
/// Set value in dst to `0` or `1` based on flags in status register and
1375
/// [`CmpKind`].
1376
pub fn setcc(&mut self, kind: IntCmpKind, dst: WritableReg) {
1377
self.setcc_impl(kind.into(), dst);
1378
}
1379
1380
/// Set value in dst to `1` if parity flag in status register is set, `0`
1381
/// otherwise.
1382
pub fn setp(&mut self, dst: WritableReg) {
1383
self.setcc_impl(CC::P, dst);
1384
}
1385
1386
/// Set value in dst to `1` if parity flag in status register is not set,
1387
/// `0` otherwise.
1388
pub fn setnp(&mut self, dst: WritableReg) {
1389
self.setcc_impl(CC::NP, dst);
1390
}
1391
1392
fn setcc_impl(&mut self, cc: CC, dst: WritableReg) {
1393
// Clear the dst register or bits 1 to 31 may be incorrectly set.
1394
// Don't use xor since it updates the status register.
1395
let dst: WritableGpr = dst.map(Into::into);
1396
let inst = asm::inst::movl_oi::new(dst, 0).into();
1397
self.emit(Inst::External { inst });
1398
1399
// Copy correct bit from status register into dst register.
1400
//
1401
// Note that some of these mnemonics don't match exactly and that's
1402
// intentional as there are multiple mnemonics for the same encoding in
1403
// some cases and the assembler picked ones that match Capstone rather
1404
// than Cranelift.
1405
let inst = match cc {
1406
CC::O => asm::inst::seto_m::new(dst).into(),
1407
CC::NO => asm::inst::setno_m::new(dst).into(),
1408
CC::B => asm::inst::setb_m::new(dst).into(),
1409
CC::NB => asm::inst::setae_m::new(dst).into(), // nb == ae
1410
CC::Z => asm::inst::sete_m::new(dst).into(), // z == e
1411
CC::NZ => asm::inst::setne_m::new(dst).into(), // nz == ne
1412
CC::BE => asm::inst::setbe_m::new(dst).into(),
1413
CC::NBE => asm::inst::seta_m::new(dst).into(), // nbe == a
1414
CC::S => asm::inst::sets_m::new(dst).into(),
1415
CC::NS => asm::inst::setns_m::new(dst).into(),
1416
CC::L => asm::inst::setl_m::new(dst).into(),
1417
CC::NL => asm::inst::setge_m::new(dst).into(), // nl == ge
1418
CC::LE => asm::inst::setle_m::new(dst).into(),
1419
CC::NLE => asm::inst::setg_m::new(dst).into(), // nle == g
1420
CC::P => asm::inst::setp_m::new(dst).into(),
1421
CC::NP => asm::inst::setnp_m::new(dst).into(),
1422
};
1423
self.emit(Inst::External { inst });
1424
}
1425
1426
/// Store the count of leading zeroes in src in dst.
1427
/// Requires `has_lzcnt` flag.
1428
pub fn lzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1429
assert!(self.isa_flags.has_lzcnt(), "Requires has_lzcnt flag");
1430
let dst = WritableGpr::from_reg(dst.to_reg().into());
1431
let inst = match size {
1432
OperandSize::S16 => asm::inst::lzcntw_rm::new(dst, src).into(),
1433
OperandSize::S32 => asm::inst::lzcntl_rm::new(dst, src).into(),
1434
OperandSize::S64 => asm::inst::lzcntq_rm::new(dst, src).into(),
1435
OperandSize::S8 | OperandSize::S128 => unreachable!(),
1436
};
1437
self.emit(Inst::External { inst });
1438
}
1439
1440
/// Store the count of trailing zeroes in src in dst.
1441
/// Requires `has_bmi1` flag.
1442
pub fn tzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1443
assert!(self.isa_flags.has_bmi1(), "Requires has_bmi1 flag");
1444
let dst = WritableGpr::from_reg(dst.to_reg().into());
1445
let inst = match size {
1446
OperandSize::S16 => asm::inst::tzcntw_a::new(dst, src).into(),
1447
OperandSize::S32 => asm::inst::tzcntl_a::new(dst, src).into(),
1448
OperandSize::S64 => asm::inst::tzcntq_a::new(dst, src).into(),
1449
OperandSize::S8 | OperandSize::S128 => unreachable!(),
1450
};
1451
self.emit(Inst::External { inst });
1452
}
1453
1454
/// Stores position of the most significant bit set in src in dst.
1455
/// Zero flag is set if src is equal to 0.
1456
pub fn bsr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1457
let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());
1458
let inst = match size {
1459
OperandSize::S16 => asm::inst::bsrw_rm::new(dst, src).into(),
1460
OperandSize::S32 => asm::inst::bsrl_rm::new(dst, src).into(),
1461
OperandSize::S64 => asm::inst::bsrq_rm::new(dst, src).into(),
1462
OperandSize::S8 | OperandSize::S128 => unreachable!(),
1463
};
1464
self.emit(Inst::External { inst });
1465
}
1466
1467
/// Performs integer negation on `src` and places result in `dst`.
1468
pub fn neg(&mut self, read: Reg, write: WritableReg, size: OperandSize) {
1469
let gpr = PairedGpr {
1470
read: read.into(),
1471
write: WritableGpr::from_reg(write.to_reg().into()),
1472
};
1473
let inst = match size {
1474
OperandSize::S8 => asm::inst::negb_m::new(gpr).into(),
1475
OperandSize::S16 => asm::inst::negw_m::new(gpr).into(),
1476
OperandSize::S32 => asm::inst::negl_m::new(gpr).into(),
1477
OperandSize::S64 => asm::inst::negq_m::new(gpr).into(),
1478
OperandSize::S128 => unreachable!(),
1479
};
1480
self.emit(Inst::External { inst });
1481
}
1482
1483
/// Stores position of the least significant bit set in src in dst.
1484
/// Zero flag is set if src is equal to 0.
1485
pub fn bsf(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1486
let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());
1487
let inst = match size {
1488
OperandSize::S16 => asm::inst::bsfw_rm::new(dst, src).into(),
1489
OperandSize::S32 => asm::inst::bsfl_rm::new(dst, src).into(),
1490
OperandSize::S64 => asm::inst::bsfq_rm::new(dst, src).into(),
1491
OperandSize::S8 | OperandSize::S128 => unreachable!(),
1492
};
1493
self.emit(Inst::External { inst });
1494
}
1495
1496
/// Performs float addition on src and dst and places result in dst.
1497
pub fn xmm_add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1498
let dst = pair_xmm(dst);
1499
let inst = match size {
1500
OperandSize::S32 => asm::inst::addss_a::new(dst, src).into(),
1501
OperandSize::S64 => asm::inst::addsd_a::new(dst, src).into(),
1502
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1503
};
1504
self.emit(Inst::External { inst });
1505
}
1506
1507
/// Performs float subtraction on src and dst and places result in dst.
1508
pub fn xmm_sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1509
let dst = pair_xmm(dst);
1510
let inst = match size {
1511
OperandSize::S32 => asm::inst::subss_a::new(dst, src).into(),
1512
OperandSize::S64 => asm::inst::subsd_a::new(dst, src).into(),
1513
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1514
};
1515
self.emit(Inst::External { inst });
1516
}
1517
1518
/// Performs float multiplication on src and dst and places result in dst.
1519
pub fn xmm_mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1520
use OperandSize::*;
1521
let dst = pair_xmm(dst);
1522
let inst = match size {
1523
S32 => asm::inst::mulss_a::new(dst, src).into(),
1524
S64 => asm::inst::mulsd_a::new(dst, src).into(),
1525
S8 | S16 | S128 => unreachable!(),
1526
};
1527
self.emit(Inst::External { inst });
1528
}
1529
1530
/// Performs float division on src and dst and places result in dst.
1531
pub fn xmm_div_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1532
let dst = pair_xmm(dst);
1533
let inst = match size {
1534
OperandSize::S32 => asm::inst::divss_a::new(dst, src).into(),
1535
OperandSize::S64 => asm::inst::divsd_a::new(dst, src).into(),
1536
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1537
};
1538
self.emit(Inst::External { inst });
1539
}
1540
1541
/// Minimum for src and dst XMM registers with results put in dst.
1542
pub fn xmm_min_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1543
self.emit(Inst::XmmMinMaxSeq {
1544
size: size.into(),
1545
is_min: true,
1546
lhs: src.into(),
1547
rhs: dst.to_reg().into(),
1548
dst: dst.map(Into::into),
1549
});
1550
}
1551
1552
/// Maximum for src and dst XMM registers with results put in dst.
1553
pub fn xmm_max_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1554
self.emit(Inst::XmmMinMaxSeq {
1555
size: size.into(),
1556
is_min: false,
1557
lhs: src.into(),
1558
rhs: dst.to_reg().into(),
1559
dst: dst.map(Into::into),
1560
});
1561
}
1562
1563
/// Perform rounding operation on float register src and place results in
1564
/// float register dst.
1565
pub fn xmm_rounds_rr(
1566
&mut self,
1567
src: Reg,
1568
dst: WritableReg,
1569
mode: RoundingMode,
1570
size: OperandSize,
1571
) {
1572
let dst = dst.map(|r| r.into());
1573
1574
let imm: u8 = match mode {
1575
RoundingMode::Nearest => 0x00,
1576
RoundingMode::Down => 0x01,
1577
RoundingMode::Up => 0x02,
1578
RoundingMode::Zero => 0x03,
1579
};
1580
1581
let inst = match size {
1582
OperandSize::S32 => asm::inst::roundss_rmi::new(dst, src, imm).into(),
1583
OperandSize::S64 => asm::inst::roundsd_rmi::new(dst, src, imm).into(),
1584
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1585
};
1586
1587
self.emit(Inst::External { inst });
1588
}
1589
1590
pub fn sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1591
use OperandSize::*;
1592
let dst = pair_xmm(dst);
1593
let inst = match size {
1594
S32 => asm::inst::sqrtss_a::new(dst, src).into(),
1595
S64 => asm::inst::sqrtsd_a::new(dst, src).into(),
1596
S8 | S16 | S128 => unimplemented!(),
1597
};
1598
self.emit(Inst::External { inst });
1599
}
1600
1601
/// Emit a call to an unknown location through a register.
1602
pub fn call_with_reg(&mut self, cc: CallingConvention, callee: Reg) {
1603
self.emit(Inst::CallUnknown {
1604
info: Box::new(CallInfo::empty(RegMem::reg(callee.into()), cc.into())),
1605
});
1606
}
1607
1608
/// Emit a call to a locally defined function through an index.
1609
pub fn call_with_name(&mut self, cc: CallingConvention, name: UserExternalNameRef) {
1610
self.emit(Inst::CallKnown {
1611
info: Box::new(CallInfo::empty(ExternalName::user(name), cc.into())),
1612
});
1613
}
1614
1615
/// Emits a conditional jump to the given label.
1616
pub fn jmp_if(&mut self, cc: impl Into<CC>, taken: MachLabel) {
1617
self.emit(Inst::WinchJmpIf {
1618
cc: cc.into(),
1619
taken,
1620
});
1621
}
1622
1623
/// Performs an unconditional jump to the given label.
1624
pub fn jmp(&mut self, target: MachLabel) {
1625
self.emit(Inst::JmpKnown { dst: target });
1626
}
1627
1628
/// Emits a jump table sequence.
1629
pub fn jmp_table(
1630
&mut self,
1631
targets: SmallVec<[MachLabel; 4]>,
1632
default: MachLabel,
1633
index: Reg,
1634
tmp1: Reg,
1635
tmp2: Reg,
1636
) {
1637
self.emit(Inst::JmpTableSeq {
1638
idx: index.into(),
1639
tmp1: Writable::from_reg(tmp1.into()),
1640
tmp2: Writable::from_reg(tmp2.into()),
1641
default_target: default,
1642
targets: Box::new(targets.to_vec()),
1643
})
1644
}
1645
1646
/// Emit a trap instruction.
1647
pub fn trap(&mut self, code: TrapCode) {
1648
let inst = asm::inst::ud2_zo::new(code).into();
1649
self.emit(Inst::External { inst });
1650
}
1651
1652
/// Conditional trap.
1653
pub fn trapif(&mut self, cc: impl Into<CC>, trap_code: TrapCode) {
1654
self.emit(Inst::TrapIf {
1655
cc: cc.into(),
1656
trap_code,
1657
});
1658
}
1659
1660
/// Load effective address.
1661
pub fn lea(&mut self, addr: &Address, dst: WritableReg, size: OperandSize) {
1662
let addr = Self::to_synthetic_amode(addr, MemFlags::trusted());
1663
let dst: WritableGpr = dst.map(Into::into);
1664
let inst = match size {
1665
OperandSize::S16 => asm::inst::leaw_rm::new(dst, addr).into(),
1666
OperandSize::S32 => asm::inst::leal_rm::new(dst, addr).into(),
1667
OperandSize::S64 => asm::inst::leaq_rm::new(dst, addr).into(),
1668
OperandSize::S8 | OperandSize::S128 => unimplemented!(),
1669
};
1670
self.emit(Inst::External { inst });
1671
}
1672
1673
pub fn adc_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1674
let dst = pair_gpr(dst);
1675
let inst = match size {
1676
OperandSize::S8 => asm::inst::adcb_rm::new(dst, src).into(),
1677
OperandSize::S16 => asm::inst::adcw_rm::new(dst, src).into(),
1678
OperandSize::S32 => asm::inst::adcl_rm::new(dst, src).into(),
1679
OperandSize::S64 => asm::inst::adcq_rm::new(dst, src).into(),
1680
OperandSize::S128 => unimplemented!(),
1681
};
1682
self.emit(Inst::External { inst });
1683
}
1684
1685
pub fn sbb_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1686
let dst = pair_gpr(dst);
1687
let inst = match size {
1688
OperandSize::S8 => asm::inst::sbbb_rm::new(dst, src).into(),
1689
OperandSize::S16 => asm::inst::sbbw_rm::new(dst, src).into(),
1690
OperandSize::S32 => asm::inst::sbbl_rm::new(dst, src).into(),
1691
OperandSize::S64 => asm::inst::sbbq_rm::new(dst, src).into(),
1692
OperandSize::S128 => unimplemented!(),
1693
};
1694
self.emit(Inst::External { inst });
1695
}
1696
1697
pub fn mul_wide(
1698
&mut self,
1699
dst_lo: WritableReg,
1700
dst_hi: WritableReg,
1701
lhs: Reg,
1702
rhs: Reg,
1703
kind: MulWideKind,
1704
size: OperandSize,
1705
) {
1706
use MulWideKind::*;
1707
use OperandSize::*;
1708
let rax = asm::Fixed(PairedGpr {
1709
read: lhs.into(),
1710
write: WritableGpr::from_reg(dst_lo.to_reg().into()),
1711
});
1712
let rdx = asm::Fixed(dst_hi.to_reg().into());
1713
if size == S8 {
1714
// For `mulb` and `imulb`, both the high and low bits are written to
1715
// RAX.
1716
assert_eq!(dst_lo, dst_hi);
1717
}
1718
let inst = match (size, kind) {
1719
(S8, Unsigned) => asm::inst::mulb_m::new(rax, rhs).into(),
1720
(S8, Signed) => asm::inst::imulb_m::new(rax, rhs).into(),
1721
(S16, Unsigned) => asm::inst::mulw_m::new(rax, rdx, rhs).into(),
1722
(S16, Signed) => asm::inst::imulw_m::new(rax, rdx, rhs).into(),
1723
(S32, Unsigned) => asm::inst::mull_m::new(rax, rdx, rhs).into(),
1724
(S32, Signed) => asm::inst::imull_m::new(rax, rdx, rhs).into(),
1725
(S64, Unsigned) => asm::inst::mulq_m::new(rax, rdx, rhs).into(),
1726
(S64, Signed) => asm::inst::imulq_m::new(rax, rdx, rhs).into(),
1727
(S128, _) => unimplemented!(),
1728
};
1729
self.emit(Inst::External { inst });
1730
}
1731
1732
/// Shuffles bytes in `src` according to contents of `mask` and puts
1733
/// result in `dst`.
1734
pub fn xmm_vpshufb_rrm(&mut self, dst: WritableReg, src: Reg, mask: &Address) {
1735
let dst: WritableXmm = dst.map(|r| r.into());
1736
let mask = Self::to_synthetic_amode(mask, MemFlags::trusted());
1737
let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();
1738
self.emit(Inst::External { inst });
1739
}
1740
1741
/// Shuffles bytes in `src` according to contents of `mask` and puts
1742
/// result in `dst`.
1743
pub fn xmm_vpshufb_rrr(&mut self, dst: WritableReg, src: Reg, mask: Reg) {
1744
let dst: WritableXmm = dst.map(|r| r.into());
1745
let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();
1746
self.emit(Inst::External { inst });
1747
}
1748
1749
/// Add unsigned integers with unsigned saturation.
1750
///
1751
/// Adds the src operands but when an individual byte result is larger than
1752
/// an unsigned byte integer, 0xFF is written instead.
1753
pub fn xmm_vpaddus_rrm(
1754
&mut self,
1755
dst: WritableReg,
1756
src1: Reg,
1757
src2: &Address,
1758
size: OperandSize,
1759
) {
1760
let dst: WritableXmm = dst.map(|r| r.into());
1761
let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1762
let inst = match size {
1763
OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),
1764
OperandSize::S32 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),
1765
_ => unimplemented!(),
1766
};
1767
self.emit(Inst::External { inst });
1768
}
1769
1770
/// Add unsigned integers with unsigned saturation.
1771
///
1772
/// Adds the src operands but when an individual byte result is larger than
1773
/// an unsigned byte integer, 0xFF is written instead.
1774
pub fn xmm_vpaddus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
1775
let dst: WritableXmm = dst.map(|r| r.into());
1776
let inst = match size {
1777
OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),
1778
OperandSize::S16 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),
1779
_ => unimplemented!(),
1780
};
1781
self.emit(Inst::External { inst });
1782
}
1783
1784
/// Add signed integers.
1785
pub fn xmm_vpadds_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
1786
let dst: WritableXmm = dst.map(|r| r.into());
1787
let inst = match size {
1788
OperandSize::S8 => asm::inst::vpaddsb_b::new(dst, src1, src2).into(),
1789
OperandSize::S16 => asm::inst::vpaddsw_b::new(dst, src1, src2).into(),
1790
_ => unimplemented!(),
1791
};
1792
self.emit(Inst::External { inst });
1793
}
1794
1795
pub fn xmm_vpadd_rmr(
1796
&mut self,
1797
src1: Reg,
1798
src2: &Address,
1799
dst: WritableReg,
1800
size: OperandSize,
1801
) {
1802
let dst: WritableXmm = dst.map(|r| r.into());
1803
let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
1804
let inst = match size {
1805
OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, address).into(),
1806
OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, address).into(),
1807
OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, address).into(),
1808
_ => unimplemented!(),
1809
};
1810
self.emit(Inst::External { inst });
1811
}
1812
1813
/// Adds vectors of integers in `src1` and `src2` and puts the results in
1814
/// `dst`.
1815
pub fn xmm_vpadd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
1816
let dst: WritableXmm = dst.map(|r| r.into());
1817
let inst = match size {
1818
OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, src2).into(),
1819
OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, src2).into(),
1820
OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, src2).into(),
1821
OperandSize::S64 => asm::inst::vpaddq_b::new(dst, src1, src2).into(),
1822
_ => unimplemented!(),
1823
};
1824
self.emit(Inst::External { inst });
1825
}
1826
1827
pub fn mfence(&mut self) {
1828
self.emit(Inst::External {
1829
inst: asm::inst::mfence_zo::new().into(),
1830
});
1831
}
1832
1833
/// Extract a value from `src` into `addr` determined by `lane`.
1834
pub(crate) fn xmm_vpextr_rm(
1835
&mut self,
1836
addr: &Address,
1837
src: Reg,
1838
lane: u8,
1839
size: OperandSize,
1840
flags: MemFlags,
1841
) {
1842
assert!(addr.is_offset());
1843
let dst = Self::to_synthetic_amode(addr, flags);
1844
let inst = match size {
1845
OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),
1846
OperandSize::S16 => asm::inst::vpextrw_b::new(dst, src, lane).into(),
1847
OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),
1848
OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),
1849
_ => unimplemented!(),
1850
};
1851
self.emit(Inst::External { inst });
1852
}
1853
1854
/// Extract a value from `src` into `dst` (zero extended) determined by `lane`.
1855
pub fn xmm_vpextr_rr(&mut self, dst: WritableReg, src: Reg, lane: u8, size: OperandSize) {
1856
let dst: WritableGpr = dst.map(|r| r.into());
1857
let inst = match size {
1858
OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),
1859
OperandSize::S16 => asm::inst::vpextrw_a::new(dst, src, lane).into(),
1860
OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),
1861
OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),
1862
_ => unimplemented!(),
1863
};
1864
self.emit(Inst::External { inst });
1865
}
1866
1867
/// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1868
/// the location specified in `count`.
1869
pub fn xmm_vpinsr_rrm(
1870
&mut self,
1871
dst: WritableReg,
1872
src1: Reg,
1873
src2: &Address,
1874
count: u8,
1875
size: OperandSize,
1876
) {
1877
let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1878
let dst: WritableXmm = dst.map(|r| r.into());
1879
1880
let inst = match size {
1881
OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),
1882
OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),
1883
OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),
1884
OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),
1885
OperandSize::S128 => unreachable!(),
1886
};
1887
self.emit(Inst::External { inst });
1888
}
1889
1890
/// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1891
/// the location specified in `count`.
1892
pub fn xmm_vpinsr_rrr(
1893
&mut self,
1894
dst: WritableReg,
1895
src1: Reg,
1896
src2: Reg,
1897
count: u8,
1898
size: OperandSize,
1899
) {
1900
let dst: WritableXmm = dst.map(|r| r.into());
1901
let inst = match size {
1902
OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),
1903
OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),
1904
OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),
1905
OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),
1906
OperandSize::S128 => unreachable!(),
1907
};
1908
self.emit(Inst::External { inst });
1909
}
1910
1911
/// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
1912
pub fn xmm_vinsertps_rrm(&mut self, dst: WritableReg, src1: Reg, address: &Address, imm: u8) {
1913
let dst: WritableXmm = dst.map(|r| r.into());
1914
let address = Self::to_synthetic_amode(address, MemFlags::trusted());
1915
let inst = asm::inst::vinsertps_b::new(dst, src1, address, imm).into();
1916
self.emit(Inst::External { inst });
1917
}
1918
1919
/// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
1920
pub fn xmm_vinsertps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, imm: u8) {
1921
let dst: WritableXmm = dst.map(|r| r.into());
1922
let inst = asm::inst::vinsertps_b::new(dst, src1, src2, imm).into();
1923
self.emit(Inst::External { inst });
1924
}
1925
1926
/// Moves lower 64-bit float from `src2` into lower 64-bits of `dst` and the
1927
/// upper 64-bits in `src1` into the upper 64-bits of `dst`.
1928
pub fn xmm_vmovsd_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1929
let dst: WritableXmm = dst.map(|r| r.into());
1930
let inst = asm::inst::vmovsd_b::new(dst, src1, src2).into();
1931
self.emit(Inst::External { inst });
1932
}
1933
1934
/// Moves 64-bit float from `src` into lower 64-bits of `dst`.
1935
/// Zeroes out the upper 64 bits of `dst`.
1936
pub fn xmm_vmovsd_rm(&mut self, dst: WritableReg, src: &Address) {
1937
let src = Self::to_synthetic_amode(src, MemFlags::trusted());
1938
let dst: WritableXmm = dst.map(|r| r.into());
1939
let inst = asm::inst::vmovsd_d::new(dst, src).into();
1940
self.emit(Inst::External { inst });
1941
}
1942
1943
/// Moves two 32-bit floats from `src2` to the upper 64-bits of `dst`.
1944
/// Copies two 32-bit floats from the lower 64-bits of `src1` to lower
1945
/// 64-bits of `dst`.
1946
pub fn xmm_vmovlhps_rrm(&mut self, dst: WritableReg, src1: Reg, src2: &Address) {
1947
let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1948
let dst: WritableXmm = dst.map(|r| r.into());
1949
let inst = asm::inst::vmovhps_b::new(dst, src1, src2).into();
1950
self.emit(Inst::External { inst });
1951
}
1952
1953
/// Moves two 32-bit floats from the lower 64-bits of `src2` to the upper
1954
/// 64-bits of `dst`. Copies two 32-bit floats from the lower 64-bits of
1955
/// `src1` to lower 64-bits of `dst`.
1956
pub fn xmm_vmovlhps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1957
let dst: WritableXmm = dst.map(|r| r.into());
1958
let inst = asm::inst::vmovlhps_rvm::new(dst, src1, src2).into();
1959
self.emit(Inst::External { inst });
1960
}
1961
1962
/// Move unaligned packed integer values from address `src` to `dst`.
1963
pub fn xmm_vmovdqu_mr(&mut self, src: &Address, dst: WritableReg, flags: MemFlags) {
1964
let src = Self::to_synthetic_amode(src, flags);
1965
let dst: WritableXmm = dst.map(|r| r.into());
1966
let inst = asm::inst::vmovdqu_a::new(dst, src).into();
1967
self.emit(Inst::External { inst });
1968
}
1969
1970
/// Move integer from `src` to xmm register `dst` using an AVX instruction.
1971
pub fn avx_gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1972
let dst: WritableXmm = dst.map(|r| r.into());
1973
let inst = match size {
1974
OperandSize::S32 => asm::inst::vmovd_a::new(dst, src).into(),
1975
OperandSize::S64 => asm::inst::vmovq_a::new(dst, src).into(),
1976
_ => unreachable!(),
1977
};
1978
1979
self.emit(Inst::External { inst });
1980
}
1981
1982
pub fn xmm_vptest(&mut self, src1: Reg, src2: Reg) {
1983
let inst = asm::inst::vptest_rm::new(src1, src2).into();
1984
self.emit(Inst::External { inst });
1985
}
1986
1987
/// Converts vector of integers into vector of floating values.
1988
pub fn xmm_vcvt_rr(&mut self, src: Reg, dst: WritableReg, kind: VcvtKind) {
1989
let dst: WritableXmm = dst.map(|x| x.into());
1990
let inst = match kind {
1991
VcvtKind::I32ToF32 => asm::inst::vcvtdq2ps_a::new(dst, src).into(),
1992
VcvtKind::I32ToF64 => asm::inst::vcvtdq2pd_a::new(dst, src).into(),
1993
VcvtKind::F64ToF32 => asm::inst::vcvtpd2ps_a::new(dst, src).into(),
1994
VcvtKind::F64ToI32 => asm::inst::vcvttpd2dq_a::new(dst, src).into(),
1995
VcvtKind::F32ToF64 => asm::inst::vcvtps2pd_a::new(dst, src).into(),
1996
VcvtKind::F32ToI32 => asm::inst::vcvttps2dq_a::new(dst, src).into(),
1997
};
1998
self.emit(Inst::External { inst });
1999
}
2000
2001
/// Subtract floats in vector `src1` to floats in vector `src2`.
2002
pub fn xmm_vsubp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2003
let dst: WritableXmm = dst.map(|r| r.into());
2004
let inst = match size {
2005
OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),
2006
OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),
2007
_ => unimplemented!(),
2008
};
2009
self.emit(Inst::External { inst });
2010
}
2011
2012
/// Subtract integers in vector `src1` from integers in vector `src2`.
2013
pub fn xmm_vpsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2014
let dst: WritableXmm = dst.map(|r| r.into());
2015
let inst = match size {
2016
OperandSize::S8 => asm::inst::vpsubb_b::new(dst, src1, src2).into(),
2017
OperandSize::S16 => asm::inst::vpsubw_b::new(dst, src1, src2).into(),
2018
OperandSize::S32 => asm::inst::vpsubd_b::new(dst, src1, src2).into(),
2019
OperandSize::S64 => asm::inst::vpsubq_b::new(dst, src1, src2).into(),
2020
_ => unimplemented!(),
2021
};
2022
self.emit(Inst::External { inst });
2023
}
2024
2025
/// Subtract unsigned integers with unsigned saturation.
2026
pub fn xmm_vpsubus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
2027
let dst: WritableXmm = dst.map(|r| r.into());
2028
let inst = match size {
2029
OperandSize::S8 => asm::inst::vpsubusb_b::new(dst, src1, src2).into(),
2030
OperandSize::S16 => asm::inst::vpsubusw_b::new(dst, src1, src2).into(),
2031
_ => unimplemented!(),
2032
};
2033
self.emit(Inst::External { inst });
2034
}
2035
2036
/// Subtract signed integers with signed saturation.
2037
pub fn xmm_vpsubs_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
2038
let dst: WritableXmm = dst.map(|r| r.into());
2039
let inst = match size {
2040
OperandSize::S8 => asm::inst::vpsubsb_b::new(dst, src1, src2).into(),
2041
OperandSize::S16 => asm::inst::vpsubsw_b::new(dst, src1, src2).into(),
2042
_ => unimplemented!(),
2043
};
2044
self.emit(Inst::External { inst });
2045
}
2046
2047
/// Add floats in vector `src1` to floats in vector `src2`.
2048
pub fn xmm_vaddp_rrm(
2049
&mut self,
2050
src1: Reg,
2051
src2: &Address,
2052
dst: WritableReg,
2053
size: OperandSize,
2054
) {
2055
let dst: WritableXmm = dst.map(|r| r.into());
2056
let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2057
let inst = match size {
2058
OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, address).into(),
2059
OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, address).into(),
2060
_ => unimplemented!(),
2061
};
2062
self.emit(Inst::External { inst });
2063
}
2064
2065
/// Add floats in vector `src1` to floats in vector `src2`.
2066
pub fn xmm_vaddp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2067
let dst: WritableXmm = dst.map(|r| r.into());
2068
let inst = match size {
2069
OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, src2).into(),
2070
OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, src2).into(),
2071
_ => unimplemented!(),
2072
};
2073
self.emit(Inst::External { inst });
2074
}
2075
2076
/// Compare vector register `lhs` with a vector of integers in `rhs` for
2077
/// equality between packed integers and write the resulting vector into
2078
/// `dst`.
2079
pub fn xmm_vpcmpeq_rrm(
2080
&mut self,
2081
dst: WritableReg,
2082
lhs: Reg,
2083
address: &Address,
2084
size: OperandSize,
2085
) {
2086
let dst: WritableXmm = dst.map(|r| r.into());
2087
let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2088
let inst = match size {
2089
OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, address).into(),
2090
OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, address).into(),
2091
OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, address).into(),
2092
OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, address).into(),
2093
_ => unimplemented!(),
2094
};
2095
self.emit(Inst::External { inst });
2096
}
2097
2098
/// Compare vector registers `lhs` and `rhs` for equality between packed
2099
/// integers and write the resulting vector into `dst`.
2100
pub fn xmm_vpcmpeq_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2101
let dst: WritableXmm = dst.map(|r| r.into());
2102
let inst = match size {
2103
OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, rhs).into(),
2104
OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, rhs).into(),
2105
OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, rhs).into(),
2106
OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, rhs).into(),
2107
_ => unimplemented!(),
2108
};
2109
self.emit(Inst::External { inst });
2110
}
2111
2112
/// Performs a greater than comparison with vectors of signed integers in
2113
/// `lhs` and `rhs` and puts the results in `dst`.
2114
pub fn xmm_vpcmpgt_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2115
let dst: WritableXmm = dst.map(|r| r.into());
2116
let inst = match size {
2117
OperandSize::S8 => asm::inst::vpcmpgtb_b::new(dst, lhs, rhs).into(),
2118
OperandSize::S16 => asm::inst::vpcmpgtw_b::new(dst, lhs, rhs).into(),
2119
OperandSize::S32 => asm::inst::vpcmpgtd_b::new(dst, lhs, rhs).into(),
2120
OperandSize::S64 => asm::inst::vpcmpgtq_b::new(dst, lhs, rhs).into(),
2121
_ => unimplemented!(),
2122
};
2123
self.emit(Inst::External { inst });
2124
}
2125
2126
/// Performs a max operation with vectors of signed integers in `lhs` and
2127
/// `rhs` and puts the results in `dst`.
2128
pub fn xmm_vpmaxs_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2129
let dst: WritableXmm = dst.map(|r| r.into());
2130
let inst = match size {
2131
OperandSize::S8 => asm::inst::vpmaxsb_b::new(dst, lhs, rhs).into(),
2132
OperandSize::S16 => asm::inst::vpmaxsw_b::new(dst, lhs, rhs).into(),
2133
OperandSize::S32 => asm::inst::vpmaxsd_b::new(dst, lhs, rhs).into(),
2134
_ => unimplemented!(),
2135
};
2136
self.emit(Inst::External { inst });
2137
}
2138
2139
/// Performs a max operation with vectors of unsigned integers in `lhs` and
2140
/// `rhs` and puts the results in `dst`.
2141
pub fn xmm_vpmaxu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2142
let dst: WritableXmm = dst.map(|r| r.into());
2143
let inst = match size {
2144
OperandSize::S8 => asm::inst::vpmaxub_b::new(dst, lhs, rhs).into(),
2145
OperandSize::S16 => asm::inst::vpmaxuw_b::new(dst, lhs, rhs).into(),
2146
OperandSize::S32 => asm::inst::vpmaxud_b::new(dst, lhs, rhs).into(),
2147
_ => unimplemented!(),
2148
};
2149
self.emit(Inst::External { inst });
2150
}
2151
2152
/// Performs a min operation with vectors of signed integers in `lhs` and
2153
/// `rhs` and puts the results in `dst`.
2154
pub fn xmm_vpmins_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2155
let dst: WritableXmm = dst.map(|r| r.into());
2156
let inst = match size {
2157
OperandSize::S8 => asm::inst::vpminsb_b::new(dst, lhs, rhs).into(),
2158
OperandSize::S16 => asm::inst::vpminsw_b::new(dst, lhs, rhs).into(),
2159
OperandSize::S32 => asm::inst::vpminsd_b::new(dst, lhs, rhs).into(),
2160
_ => unimplemented!(),
2161
};
2162
self.emit(Inst::External { inst });
2163
}
2164
2165
/// Performs a min operation with vectors of unsigned integers in `lhs` and
2166
/// `rhs` and puts the results in `dst`.
2167
pub fn xmm_vpminu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2168
let dst: WritableXmm = dst.map(|r| r.into());
2169
let inst = match size {
2170
OperandSize::S8 => asm::inst::vpminub_b::new(dst, lhs, rhs).into(),
2171
OperandSize::S16 => asm::inst::vpminuw_b::new(dst, lhs, rhs).into(),
2172
OperandSize::S32 => asm::inst::vpminud_b::new(dst, lhs, rhs).into(),
2173
_ => unimplemented!(),
2174
};
2175
self.emit(Inst::External { inst });
2176
}
2177
2178
/// Performs a comparison operation between vectors of floats in `lhs` and
2179
/// `rhs` and puts the results in `dst`.
2180
pub fn xmm_vcmpp_rrr(
2181
&mut self,
2182
dst: WritableReg,
2183
lhs: Reg,
2184
rhs: Reg,
2185
size: OperandSize,
2186
kind: VcmpKind,
2187
) {
2188
let dst: WritableXmm = dst.map(|r| r.into());
2189
let imm = match kind {
2190
VcmpKind::Eq => 0,
2191
VcmpKind::Lt => 1,
2192
VcmpKind::Le => 2,
2193
VcmpKind::Unord => 3,
2194
VcmpKind::Ne => 4,
2195
};
2196
let inst = match size {
2197
OperandSize::S32 => asm::inst::vcmpps_b::new(dst, lhs, rhs, imm).into(),
2198
OperandSize::S64 => asm::inst::vcmppd_b::new(dst, lhs, rhs, imm).into(),
2199
_ => unimplemented!(),
2200
};
2201
self.emit(Inst::External { inst });
2202
}
2203
2204
/// Performs a subtraction on two vectors of floats and puts the results in
2205
/// `dst`.
2206
pub fn xmm_vsub_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize) {
2207
let dst: WritableXmm = dst.map(|r| r.into());
2208
let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2209
let inst = match size {
2210
OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, address).into(),
2211
_ => unimplemented!(),
2212
};
2213
self.emit(Inst::External { inst });
2214
}
2215
2216
/// Performs a subtraction on two vectors of floats and puts the results in
2217
/// `dst`.
2218
pub fn xmm_vsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2219
let dst: WritableXmm = dst.map(|r| r.into());
2220
let inst = match size {
2221
OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),
2222
OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),
2223
_ => unimplemented!(),
2224
};
2225
self.emit(Inst::External { inst });
2226
}
2227
2228
/// Converts a vector of signed integers into a vector of narrower integers
2229
/// using saturation to handle overflow.
2230
pub fn xmm_vpackss_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2231
let dst: WritableXmm = dst.map(|r| r.into());
2232
let inst = match size {
2233
OperandSize::S8 => asm::inst::vpacksswb_b::new(dst, src1, src2).into(),
2234
OperandSize::S16 => asm::inst::vpackssdw_b::new(dst, src1, src2).into(),
2235
_ => unimplemented!(),
2236
};
2237
self.emit(Inst::External { inst });
2238
}
2239
2240
/// Converts a vector of unsigned integers into a vector of narrower
2241
/// integers using saturation to handle overflow.
2242
pub fn xmm_vpackus_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2243
let dst: WritableXmm = dst.map(|r| r.into());
2244
let inst = match size {
2245
OperandSize::S8 => asm::inst::vpackuswb_b::new(dst, src1, src2).into(),
2246
OperandSize::S16 => asm::inst::vpackusdw_b::new(dst, src1, src2).into(),
2247
_ => unimplemented!(),
2248
};
2249
self.emit(Inst::External { inst });
2250
}
2251
2252
/// Concatenates `src1` and `src2` and shifts right by `imm` and puts
2253
/// result in `dst`.
2254
pub fn xmm_vpalignr_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, imm: u8) {
2255
let dst: WritableXmm = dst.map(|r| r.into());
2256
let inst = asm::inst::vpalignr_b::new(dst, src1, src2, imm).into();
2257
self.emit(Inst::External { inst });
2258
}
2259
2260
/// Takes the lower lanes of vectors of floats in `src1` and `src2` and
2261
/// interleaves them in `dst`.
2262
pub fn xmm_vunpcklp_rrm(
2263
&mut self,
2264
src1: Reg,
2265
src2: &Address,
2266
dst: WritableReg,
2267
size: OperandSize,
2268
) {
2269
let dst: WritableXmm = dst.map(|r| r.into());
2270
let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2271
let inst = match size {
2272
OperandSize::S32 => asm::inst::vunpcklps_b::new(dst, src1, address).into(),
2273
_ => unimplemented!(),
2274
};
2275
self.emit(Inst::External { inst });
2276
}
2277
2278
/// Unpacks and interleaves high order data of floats in `src1` and `src2`
2279
/// and puts the results in `dst`.
2280
pub fn xmm_vunpckhp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2281
let dst: WritableXmm = dst.map(|r| r.into());
2282
let inst = match size {
2283
OperandSize::S32 => asm::inst::vunpckhps_b::new(dst, src1, src2).into(),
2284
_ => unimplemented!(),
2285
};
2286
self.emit(Inst::External { inst });
2287
}
2288
2289
/// Unpacks and interleaves the lower lanes of vectors of integers in `src1`
2290
/// and `src2` and puts the results in `dst`.
2291
pub fn xmm_vpunpckl_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2292
let dst: WritableXmm = dst.map(|r| r.into());
2293
let inst = match size {
2294
OperandSize::S8 => asm::inst::vpunpcklbw_b::new(dst, src1, src2).into(),
2295
OperandSize::S16 => asm::inst::vpunpcklwd_b::new(dst, src1, src2).into(),
2296
_ => unimplemented!(),
2297
};
2298
self.emit(Inst::External { inst });
2299
}
2300
2301
/// Unpacks and interleaves the higher lanes of vectors of integers in
2302
/// `src1` and `src2` and puts the results in `dst`.
2303
pub fn xmm_vpunpckh_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2304
let dst: WritableXmm = dst.map(|r| r.into());
2305
let inst = match size {
2306
OperandSize::S8 => asm::inst::vpunpckhbw_b::new(dst, src1, src2).into(),
2307
OperandSize::S16 => asm::inst::vpunpckhwd_b::new(dst, src1, src2).into(),
2308
_ => unimplemented!(),
2309
};
2310
self.emit(Inst::External { inst });
2311
}
2312
2313
pub(crate) fn vpmullq(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2314
let dst: WritableXmm = dst.map(|r| r.into());
2315
let inst = asm::inst::vpmullq_c::new(dst, src1, src2).into();
2316
self.emit(Inst::External { inst });
2317
}
2318
2319
/// Creates a mask made up of the most significant bit of each byte of
2320
/// `src` and stores the result in `dst`.
2321
pub fn xmm_vpmovmsk_rr(
2322
&mut self,
2323
src: Reg,
2324
dst: WritableReg,
2325
src_size: OperandSize,
2326
dst_size: OperandSize,
2327
) {
2328
assert_eq!(dst_size, OperandSize::S32);
2329
let dst: WritableGpr = dst.map(|r| r.into());
2330
let inst = match src_size {
2331
OperandSize::S8 => asm::inst::vpmovmskb_rm::new(dst, src).into(),
2332
_ => unimplemented!(),
2333
};
2334
2335
self.emit(Inst::External { inst });
2336
}
2337
2338
/// Creates a mask made up of the most significant bit of each byte of
2339
/// in `src` and stores the result in `dst`.
2340
pub fn xmm_vmovskp_rr(
2341
&mut self,
2342
src: Reg,
2343
dst: WritableReg,
2344
src_size: OperandSize,
2345
dst_size: OperandSize,
2346
) {
2347
assert_eq!(dst_size, OperandSize::S32);
2348
let dst: WritableGpr = dst.map(|r| r.into());
2349
let inst = match src_size {
2350
OperandSize::S32 => asm::inst::vmovmskps_rm::new(dst, src).into(),
2351
OperandSize::S64 => asm::inst::vmovmskpd_rm::new(dst, src).into(),
2352
_ => unimplemented!(),
2353
};
2354
2355
self.emit(Inst::External { inst });
2356
}
2357
2358
/// Compute the absolute value of elements in vector `src` and put the
2359
/// results in `dst`.
2360
pub fn xmm_vpabs_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2361
let dst: WritableXmm = dst.map(|r| r.into());
2362
let inst = match size {
2363
OperandSize::S8 => asm::inst::vpabsb_a::new(dst, src).into(),
2364
OperandSize::S16 => asm::inst::vpabsw_a::new(dst, src).into(),
2365
OperandSize::S32 => asm::inst::vpabsd_a::new(dst, src).into(),
2366
_ => unimplemented!(),
2367
};
2368
self.emit(Inst::External { inst });
2369
}
2370
2371
/// Arithmetically (sign preserving) right shift on vector in `src` by
2372
/// `amount` with result written to `dst`.
2373
pub fn xmm_vpsra_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2374
let dst: WritableXmm = dst.map(|r| r.into());
2375
let inst = match size {
2376
OperandSize::S16 => asm::inst::vpsraw_c::new(dst, src, amount).into(),
2377
OperandSize::S32 => asm::inst::vpsrad_c::new(dst, src, amount).into(),
2378
_ => unimplemented!(),
2379
};
2380
self.emit(Inst::External { inst });
2381
}
2382
2383
/// Arithmetically (sign preserving) right shift on vector in `src` by
2384
/// `imm` with result written to `dst`.
2385
pub fn xmm_vpsra_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2386
let dst: WritableXmm = dst.map(|r| r.into());
2387
let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2388
let inst = match size {
2389
OperandSize::S32 => asm::inst::vpsrad_d::new(dst, src, imm).into(),
2390
_ => unimplemented!(),
2391
};
2392
self.emit(Inst::External { inst });
2393
}
2394
2395
/// Shift vector data left by `imm`.
2396
pub fn xmm_vpsll_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2397
let dst: WritableXmm = dst.map(|r| r.into());
2398
let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2399
let inst = match size {
2400
OperandSize::S32 => asm::inst::vpslld_d::new(dst, src, imm).into(),
2401
OperandSize::S64 => asm::inst::vpsllq_d::new(dst, src, imm).into(),
2402
_ => unimplemented!(),
2403
};
2404
self.emit(Inst::External { inst });
2405
}
2406
2407
/// Shift vector data left by `amount`.
2408
pub fn xmm_vpsll_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2409
let dst: WritableXmm = dst.map(|r| r.into());
2410
let inst = match size {
2411
OperandSize::S16 => asm::inst::vpsllw_c::new(dst, src, amount).into(),
2412
OperandSize::S32 => asm::inst::vpslld_c::new(dst, src, amount).into(),
2413
OperandSize::S64 => asm::inst::vpsllq_c::new(dst, src, amount).into(),
2414
_ => unimplemented!(),
2415
};
2416
self.emit(Inst::External { inst });
2417
}
2418
2419
/// Shift vector data right by `imm`.
2420
pub fn xmm_vpsrl_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2421
let dst: WritableXmm = dst.map(|r| r.into());
2422
let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2423
let inst = match size {
2424
OperandSize::S16 => asm::inst::vpsrlw_d::new(dst, src, imm).into(),
2425
OperandSize::S32 => asm::inst::vpsrld_d::new(dst, src, imm).into(),
2426
OperandSize::S64 => asm::inst::vpsrlq_d::new(dst, src, imm).into(),
2427
_ => unimplemented!(),
2428
};
2429
self.emit(Inst::External { inst });
2430
}
2431
2432
/// Shift vector data right by `amount`.
2433
pub fn xmm_vpsrl_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2434
let dst: WritableXmm = dst.map(|r| r.into());
2435
let inst = match size {
2436
OperandSize::S16 => asm::inst::vpsrlw_c::new(dst, src, amount).into(),
2437
OperandSize::S32 => asm::inst::vpsrld_c::new(dst, src, amount).into(),
2438
OperandSize::S64 => asm::inst::vpsrlq_c::new(dst, src, amount).into(),
2439
_ => unimplemented!(),
2440
};
2441
self.emit(Inst::External { inst });
2442
}
2443
2444
/// Perform an `and` operation on vectors of floats in `src1` and `src2`
2445
/// and put the results in `dst`.
2446
pub fn xmm_vandp_rrm(
2447
&mut self,
2448
src1: Reg,
2449
src2: &Address,
2450
dst: WritableReg,
2451
size: OperandSize,
2452
) {
2453
let dst: WritableXmm = dst.map(|r| r.into());
2454
let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2455
let inst = match size {
2456
OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, address).into(),
2457
OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, address).into(),
2458
_ => unimplemented!(),
2459
};
2460
self.emit(Inst::External { inst });
2461
}
2462
2463
/// Perform an `and` operation on vectors of floats in `src1` and `src2`
2464
/// and put the results in `dst`.
2465
pub fn xmm_vandp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2466
let dst: WritableXmm = dst.map(|r| r.into());
2467
let inst = match size {
2468
OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, src2).into(),
2469
OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, src2).into(),
2470
_ => unimplemented!(),
2471
};
2472
self.emit(Inst::External { inst });
2473
}
2474
2475
/// Performs a bitwise `and` operation on the vectors in `src1` and `src2`
2476
/// and stores the results in `dst`.
2477
pub fn xmm_vpand_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg) {
2478
let dst: WritableXmm = dst.map(|r| r.into());
2479
let address = Self::to_synthetic_amode(&src2, MemFlags::trusted());
2480
let inst = asm::inst::vpand_b::new(dst, src1, address).into();
2481
self.emit(Inst::External { inst });
2482
}
2483
2484
/// Performs a bitwise `and` operation on the vectors in `src1` and `src2`
2485
/// and stores the results in `dst`.
2486
pub fn xmm_vpand_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2487
let dst: WritableXmm = dst.map(|r| r.into());
2488
let inst = asm::inst::vpand_b::new(dst, src1, src2).into();
2489
self.emit(Inst::External { inst });
2490
}
2491
2492
/// Perform an `and not` operation on vectors of floats in `src1` and
2493
/// `src2` and put the results in `dst`.
2494
pub fn xmm_vandnp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2495
let dst: WritableXmm = dst.map(|r| r.into());
2496
let inst = match size {
2497
OperandSize::S32 => asm::inst::vandnps_b::new(dst, src1, src2).into(),
2498
OperandSize::S64 => asm::inst::vandnpd_b::new(dst, src1, src2).into(),
2499
_ => unimplemented!(),
2500
};
2501
self.emit(Inst::External { inst });
2502
}
2503
2504
/// Perform an `and not` operation on vectors in `src1` and `src2` and put
2505
/// the results in `dst`.
2506
pub fn xmm_vpandn_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2507
let dst: WritableXmm = dst.map(|r| r.into());
2508
let inst = asm::inst::vpandn_b::new(dst, src1, src2).into();
2509
self.emit(Inst::External { inst });
2510
}
2511
2512
/// Perform an or operation for the vectors of floats in `src1` and `src2`
2513
/// and put the results in `dst`.
2514
pub fn xmm_vorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2515
let dst: WritableXmm = dst.map(|r| r.into());
2516
let inst = match size {
2517
OperandSize::S32 => asm::inst::vorps_b::new(dst, src1, src2).into(),
2518
OperandSize::S64 => asm::inst::vorpd_b::new(dst, src1, src2).into(),
2519
_ => unimplemented!(),
2520
};
2521
self.emit(Inst::External { inst });
2522
}
2523
2524
/// Bitwise OR of `src1` and `src2`.
2525
pub fn xmm_vpor_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
2526
let dst: WritableXmm = dst.map(|r| r.into());
2527
let inst = asm::inst::vpor_b::new(dst, src1, src2).into();
2528
self.emit(Inst::External { inst });
2529
}
2530
2531
/// Bitwise logical xor of vectors of floats in `src1` and `src2` and puts
2532
/// the results in `dst`.
2533
pub fn xmm_vxorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2534
let dst: WritableXmm = dst.map(|r| r.into());
2535
let inst = match size {
2536
OperandSize::S32 => asm::inst::vxorps_b::new(dst, src1, src2).into(),
2537
OperandSize::S64 => asm::inst::vxorpd_b::new(dst, src1, src2).into(),
2538
_ => unimplemented!(),
2539
};
2540
self.emit(Inst::External { inst });
2541
}
2542
2543
/// Perform a logical on vector in `src` and in `address` and put the
2544
/// results in `dst`.
2545
pub fn xmm_vpxor_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2546
let dst: WritableXmm = dst.map(|r| r.into());
2547
let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2548
let inst = asm::inst::vpxor_b::new(dst, src, address).into();
2549
self.emit(Inst::External { inst });
2550
}
2551
2552
/// Perform a logical on vectors in `src1` and `src2` and put the results in
2553
/// `dst`.
2554
pub fn xmm_vpxor_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2555
let dst: WritableXmm = dst.map(|r| r.into());
2556
let inst = asm::inst::vpxor_b::new(dst, src1, src2).into();
2557
self.emit(Inst::External { inst });
2558
}
2559
2560
/// Perform a max operation across two vectors of floats and put the
2561
/// results in `dst`.
2562
pub fn xmm_vmaxp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2563
let dst: WritableXmm = dst.map(|r| r.into());
2564
let inst = match size {
2565
OperandSize::S32 => asm::inst::vmaxps_b::new(dst, src1, src2).into(),
2566
OperandSize::S64 => asm::inst::vmaxpd_b::new(dst, src1, src2).into(),
2567
_ => unimplemented!(),
2568
};
2569
self.emit(Inst::External { inst });
2570
}
2571
2572
// Perform a min operation across two vectors of floats and put the
2573
// results in `dst`.
2574
pub fn xmm_vminp_rrm(
2575
&mut self,
2576
src1: Reg,
2577
src2: &Address,
2578
dst: WritableReg,
2579
size: OperandSize,
2580
) {
2581
let dst: WritableXmm = dst.map(|r| r.into());
2582
let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2583
let inst = match size {
2584
OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, address).into(),
2585
OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, address).into(),
2586
_ => unimplemented!(),
2587
};
2588
self.emit(Inst::External { inst });
2589
}
2590
2591
// Perform a min operation across two vectors of floats and put the
2592
// results in `dst`.
2593
pub fn xmm_vminp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2594
let dst: WritableXmm = dst.map(|r| r.into());
2595
let inst = match size {
2596
OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, src2).into(),
2597
OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, src2).into(),
2598
_ => unimplemented!(),
2599
};
2600
self.emit(Inst::External { inst });
2601
}
2602
2603
// Round a vector of floats.
2604
pub fn xmm_vroundp_rri(
2605
&mut self,
2606
src: Reg,
2607
dst: WritableReg,
2608
mode: VroundMode,
2609
size: OperandSize,
2610
) {
2611
let dst: WritableXmm = dst.map(|r| r.into());
2612
let imm = match mode {
2613
VroundMode::TowardNearest => 0,
2614
VroundMode::TowardNegativeInfinity => 1,
2615
VroundMode::TowardPositiveInfinity => 2,
2616
VroundMode::TowardZero => 3,
2617
};
2618
2619
let inst = match size {
2620
OperandSize::S32 => asm::inst::vroundps_rmi::new(dst, src, imm).into(),
2621
OperandSize::S64 => asm::inst::vroundpd_rmi::new(dst, src, imm).into(),
2622
_ => unimplemented!(),
2623
};
2624
2625
self.emit(Inst::External { inst });
2626
}
2627
2628
/// Shuffle of vectors of floats.
2629
pub fn xmm_vshufp_rrri(
2630
&mut self,
2631
src1: Reg,
2632
src2: Reg,
2633
dst: WritableReg,
2634
imm: u8,
2635
size: OperandSize,
2636
) {
2637
let dst: WritableXmm = dst.map(|r| r.into());
2638
let inst = match size {
2639
OperandSize::S32 => asm::inst::vshufps_b::new(dst, src1, src2, imm).into(),
2640
_ => unimplemented!(),
2641
};
2642
self.emit(Inst::External { inst });
2643
}
2644
2645
/// Each lane in `src1` is multiplied by the corresponding lane in `src2`
2646
/// producing intermediate 32-bit operands. Each intermediate 32-bit
2647
/// operand is truncated to 18 most significant bits. Rounding is performed
2648
/// by adding 1 to the least significant bit of the 18-bit intermediate
2649
/// result. The 16 bits immediately to the right of the most significant
2650
/// bit of each 18-bit intermediate result is placed in each lane of `dst`.
2651
pub fn xmm_vpmulhrs_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2652
let dst: WritableXmm = dst.map(|r| r.into());
2653
let inst = match size {
2654
OperandSize::S16 => asm::inst::vpmulhrsw_b::new(dst, src1, src2).into(),
2655
_ => unimplemented!(),
2656
};
2657
self.emit(Inst::External { inst });
2658
}
2659
2660
pub fn xmm_vpmuldq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2661
let dst: WritableXmm = dst.map(|r| r.into());
2662
let inst = asm::inst::vpmuldq_b::new(dst, src1, src2).into();
2663
self.emit(Inst::External { inst });
2664
}
2665
2666
pub fn xmm_vpmuludq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2667
let dst: WritableXmm = dst.map(|r| r.into());
2668
let inst = asm::inst::vpmuludq_b::new(dst, src1, src2).into();
2669
self.emit(Inst::External { inst });
2670
}
2671
2672
pub fn xmm_vpmull_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2673
let dst: WritableXmm = dst.map(|r| r.into());
2674
let inst = match size {
2675
OperandSize::S16 => asm::inst::vpmullw_b::new(dst, src1, src2).into(),
2676
OperandSize::S32 => asm::inst::vpmulld_b::new(dst, src1, src2).into(),
2677
_ => unimplemented!(),
2678
};
2679
self.emit(Inst::External { inst });
2680
}
2681
2682
pub fn xmm_vmulp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2683
let dst: WritableXmm = dst.map(|r| r.into());
2684
let inst = match size {
2685
OperandSize::S32 => asm::inst::vmulps_b::new(dst, src1, src2).into(),
2686
OperandSize::S64 => asm::inst::vmulpd_b::new(dst, src1, src2).into(),
2687
_ => unimplemented!(),
2688
};
2689
self.emit(Inst::External { inst });
2690
}
2691
2692
/// Perform an average operation for the vector of unsigned integers in
2693
/// `src1` and `src2` and put the results in `dst`.
2694
pub fn xmm_vpavg_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2695
let dst: WritableXmm = dst.map(|r| r.into());
2696
let inst = match size {
2697
OperandSize::S8 => asm::inst::vpavgb_b::new(dst, src1, src2).into(),
2698
OperandSize::S16 => asm::inst::vpavgw_b::new(dst, src1, src2).into(),
2699
_ => unimplemented!(),
2700
};
2701
self.emit(Inst::External { inst });
2702
}
2703
2704
/// Divide the vector of floats in `src1` by the vector of floats in `src2`
2705
/// and put the results in `dst`.
2706
pub fn xmm_vdivp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2707
let dst: WritableXmm = dst.map(|r| r.into());
2708
let inst = match size {
2709
OperandSize::S32 => asm::inst::vdivps_b::new(dst, src1, src2).into(),
2710
OperandSize::S64 => asm::inst::vdivpd_b::new(dst, src1, src2).into(),
2711
_ => unimplemented!(),
2712
};
2713
self.emit(Inst::External { inst });
2714
}
2715
2716
/// Compute square roots of vector of floats in `src` and put the results
2717
/// in `dst`.
2718
pub fn xmm_vsqrtp_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2719
let dst: WritableXmm = dst.map(|r| r.into());
2720
let inst = match size {
2721
OperandSize::S32 => asm::inst::vsqrtps_b::new(dst, src).into(),
2722
OperandSize::S64 => asm::inst::vsqrtpd_b::new(dst, src).into(),
2723
_ => unimplemented!(),
2724
};
2725
self.emit(Inst::External { inst });
2726
}
2727
2728
/// Multiply and add packed signed and unsigned bytes.
2729
pub fn xmm_vpmaddubsw_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2730
let dst: WritableXmm = dst.map(|r| r.into());
2731
let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2732
let inst = asm::inst::vpmaddubsw_b::new(dst, src, address).into();
2733
self.emit(Inst::External { inst });
2734
}
2735
2736
/// Multiply and add packed signed and unsigned bytes.
2737
pub fn xmm_vpmaddubsw_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2738
let dst: WritableXmm = dst.map(|r| r.into());
2739
let inst = asm::inst::vpmaddubsw_b::new(dst, src1, src2).into();
2740
self.emit(Inst::External { inst });
2741
}
2742
2743
/// Multiple and add packed integers.
2744
pub fn xmm_vpmaddwd_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2745
let dst: WritableXmm = dst.map(|r| r.into());
2746
let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2747
let inst = asm::inst::vpmaddwd_b::new(dst, src, address).into();
2748
self.emit(Inst::External { inst });
2749
}
2750
2751
/// Multiple and add packed integers.
2752
pub fn xmm_vpmaddwd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2753
let dst: WritableXmm = dst.map(|r| r.into());
2754
let inst = asm::inst::vpmaddwd_b::new(dst, src1, src2).into();
2755
self.emit(Inst::External { inst });
2756
}
2757
}
2758
2759
/// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,
2760
/// but the immediate is not yet known. Currently, this implementation expects a 32-bit immediate,
2761
/// so 8 and 16 bit operand sizes are not supported.
2762
pub(crate) struct PatchableAddToReg {
2763
/// The region to be patched in the [`MachBuffer`]. It must contain a valid add instruction
2764
/// sequence, accepting a 32-bit immediate.
2765
region: PatchRegion,
2766
2767
/// The offset into the patchable region where the patchable constant begins.
2768
constant_offset: usize,
2769
}
2770
2771
impl PatchableAddToReg {
2772
/// Create a new [`PatchableAddToReg`] by capturing a region in the output buffer where the
2773
/// add-with-immediate occurs. The [`MachBuffer`] will have and add-with-immediate instruction
2774
/// present in that region, though it will add `0` until the `::finalize` method is called.
2775
///
2776
/// Currently this implementation expects to be able to patch a 32-bit immediate, which means
2777
/// that 8 and 16-bit addition cannot be supported.
2778
pub(crate) fn new(reg: Reg, size: OperandSize, asm: &mut Assembler) -> Self {
2779
let open = asm.buffer_mut().start_patchable();
2780
let start = asm.buffer().cur_offset();
2781
2782
// Emit the opcode and register use for the add instruction.
2783
let reg = pair_gpr(Writable::from_reg(reg));
2784
let inst = match size {
2785
OperandSize::S32 => asm::inst::addl_mi::new(reg, 0_u32).into(),
2786
OperandSize::S64 => asm::inst::addq_mi_sxl::new(reg, 0_i32).into(),
2787
_ => {
2788
panic!(
2789
"{}-bit addition is not supported, please see the comment on PatchableAddToReg::new",
2790
size.num_bits(),
2791
)
2792
}
2793
};
2794
asm.emit(Inst::External { inst });
2795
2796
// The offset to the constant is the width of what was just emitted
2797
// minus 4, the width of the 32-bit immediate.
2798
let constant_offset = usize::try_from(asm.buffer().cur_offset() - start - 4).unwrap();
2799
2800
let region = asm.buffer_mut().end_patchable(open);
2801
2802
Self {
2803
region,
2804
constant_offset,
2805
}
2806
}
2807
2808
/// Patch the [`MachBuffer`] with the known constant to be added to the register. The final
2809
/// value is passed in as an i32, but the instruction encoding is fixed when
2810
/// [`PatchableAddToReg::new`] is called.
2811
pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {
2812
let slice = self.region.patch(buffer);
2813
debug_assert_eq!(slice.len(), self.constant_offset + 4);
2814
slice[self.constant_offset..].copy_from_slice(val.to_le_bytes().as_slice());
2815
}
2816
}
2817
2818