Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/pulley/src/interp.rs
3064 views
1
//! Interpretation of pulley bytecode.
2
3
use crate::decode::*;
4
use crate::encode::Encode;
5
use crate::imms::*;
6
use crate::profile::{ExecutingPc, ExecutingPcRef};
7
use crate::regs::*;
8
use alloc::string::ToString;
9
use alloc::vec::Vec;
10
use core::fmt;
11
use core::mem;
12
use core::ops::ControlFlow;
13
use core::ops::{Index, IndexMut};
14
use core::ptr::NonNull;
15
use pulley_macros::interp_disable_if_cfg;
16
use wasmtime_core::math::{WasmFloat, f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
17
18
mod debug;
19
#[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))]
20
mod match_loop;
21
#[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))]
22
mod tail_loop;
23
24
const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB
25
26
/// A virtual machine for interpreting Pulley bytecode.
27
pub struct Vm {
28
state: MachineState,
29
executing_pc: ExecutingPc,
30
}
31
32
impl Default for Vm {
33
fn default() -> Self {
34
Vm::new()
35
}
36
}
37
38
impl Vm {
39
/// Create a new virtual machine with the default stack size.
40
pub fn new() -> Self {
41
Self::with_stack(DEFAULT_STACK_SIZE)
42
}
43
44
/// Create a new virtual machine with the given stack.
45
pub fn with_stack(stack_size: usize) -> Self {
46
Self {
47
state: MachineState::with_stack(stack_size),
48
executing_pc: ExecutingPc::default(),
49
}
50
}
51
52
/// Get a shared reference to this VM's machine state.
53
pub fn state(&self) -> &MachineState {
54
&self.state
55
}
56
57
/// Get an exclusive reference to this VM's machine state.
58
pub fn state_mut(&mut self) -> &mut MachineState {
59
&mut self.state
60
}
61
62
/// Call a bytecode function.
63
///
64
/// The given `func` must point to the beginning of a valid Pulley bytecode
65
/// function.
66
///
67
/// The given `args` must match the number and type of arguments that
68
/// function expects.
69
///
70
/// The given `rets` must match the function's actual return types.
71
///
72
/// Returns either the resulting values, or the PC at which a trap was
73
/// raised.
74
pub unsafe fn call<'a, T>(
75
&'a mut self,
76
func: NonNull<u8>,
77
args: &[Val],
78
rets: T,
79
) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>>
80
where
81
T: IntoIterator<Item = RegType> + 'a,
82
{
83
unsafe {
84
let lr = self.call_start(args);
85
86
match self.call_run(func) {
87
DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)),
88
DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind },
89
DoneReason::CallIndirectHost { id, resume } => {
90
DoneReason::CallIndirectHost { id, resume }
91
}
92
}
93
}
94
}
95
96
/// Performs the initial part of [`Vm::call`] in setting up the `args`
97
/// provided in registers according to Pulley's ABI.
98
///
99
/// # Return
100
///
101
/// Returns the old `lr` register value. The current `lr` value is replaced
102
/// with a sentinel that triggers a return to the host when returned-to.
103
///
104
/// # Unsafety
105
///
106
/// All the same unsafety as `call` and additionally, you must
107
/// invoke `call_run` and then `call_end` after calling `call_start`.
108
/// If you don't want to wrangle these invocations, use `call` instead
109
/// of `call_{start,run,end}`.
110
pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 {
111
// NB: make sure this method stays in sync with
112
// `PulleyMachineDeps::compute_arg_locs`!
113
114
let mut x_args = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
115
let mut f_args = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
116
#[cfg(not(pulley_disable_interp_simd))]
117
let mut v_args = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
118
119
for arg in args {
120
match arg {
121
Val::XReg(val) => match x_args.next() {
122
Some(reg) => self.state[reg] = *val,
123
None => todo!("stack slots"),
124
},
125
Val::FReg(val) => match f_args.next() {
126
Some(reg) => self.state[reg] = *val,
127
None => todo!("stack slots"),
128
},
129
#[cfg(not(pulley_disable_interp_simd))]
130
Val::VReg(val) => match v_args.next() {
131
Some(reg) => self.state[reg] = *val,
132
None => todo!("stack slots"),
133
},
134
}
135
}
136
137
mem::replace(&mut self.state.lr, HOST_RETURN_ADDR)
138
}
139
140
/// Peforms the internal part of [`Vm::call`] where bytecode is actually
141
/// executed.
142
///
143
/// # Unsafety
144
///
145
/// In addition to all the invariants documented for `call`, you
146
/// may only invoke `call_run` after invoking `call_start` to
147
/// initialize this call's arguments.
148
pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> {
149
self.state.debug_assert_done_reason_none();
150
let interpreter = Interpreter {
151
state: &mut self.state,
152
pc: unsafe { UnsafeBytecodeStream::new(pc) },
153
executing_pc: self.executing_pc.as_ref(),
154
};
155
let done = interpreter.run();
156
self.state.done_decode(done)
157
}
158
159
/// Peforms the tail end of [`Vm::call`] by returning the values as
160
/// determined by `rets` according to Pulley's ABI.
161
///
162
/// The `old_ret` value should have been provided from `call_start`
163
/// previously.
164
///
165
/// # Unsafety
166
///
167
/// In addition to the invariants documented for `call`, this may
168
/// only be called after `call_run`.
169
pub unsafe fn call_end<'a>(
170
&'a mut self,
171
old_ret: *mut u8,
172
rets: impl IntoIterator<Item = RegType> + 'a,
173
) -> impl Iterator<Item = Val> + 'a {
174
self.state.lr = old_ret;
175
// NB: make sure this method stays in sync with
176
// `PulleyMachineDeps::compute_arg_locs`!
177
178
let mut x_rets = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
179
let mut f_rets = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
180
#[cfg(not(pulley_disable_interp_simd))]
181
let mut v_rets = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
182
183
rets.into_iter().map(move |ty| match ty {
184
RegType::XReg => match x_rets.next() {
185
Some(reg) => Val::XReg(self.state[reg]),
186
None => todo!("stack slots"),
187
},
188
RegType::FReg => match f_rets.next() {
189
Some(reg) => Val::FReg(self.state[reg]),
190
None => todo!("stack slots"),
191
},
192
#[cfg(not(pulley_disable_interp_simd))]
193
RegType::VReg => match v_rets.next() {
194
Some(reg) => Val::VReg(self.state[reg]),
195
None => todo!("stack slots"),
196
},
197
#[cfg(pulley_disable_interp_simd)]
198
RegType::VReg => panic!("simd support disabled at compile time"),
199
})
200
}
201
202
/// Returns the current `fp` register value.
203
pub fn fp(&self) -> *mut u8 {
204
self.state.fp
205
}
206
207
/// Returns the current `lr` register value.
208
pub fn lr(&self) -> *mut u8 {
209
self.state.lr
210
}
211
212
/// Sets the current `fp` register value.
213
pub unsafe fn set_fp(&mut self, fp: *mut u8) {
214
self.state.fp = fp;
215
}
216
217
/// Sets the current `lr` register value.
218
pub unsafe fn set_lr(&mut self, lr: *mut u8) {
219
self.state.lr = lr;
220
}
221
222
/// Gets a handle to the currently executing program counter for this
223
/// interpreter which can be read from other threads.
224
//
225
// Note that despite this field still existing with `not(feature =
226
// "profile")` it's hidden from the public API in that scenario as it has no
227
// methods anyway.
228
#[cfg(feature = "profile")]
229
pub fn executing_pc(&self) -> &ExecutingPc {
230
&self.executing_pc
231
}
232
}
233
234
impl Drop for Vm {
235
fn drop(&mut self) {
236
self.executing_pc.set_done();
237
}
238
}
239
240
/// The type of a register in the Pulley machine state.
241
#[derive(Clone, Copy, Debug)]
242
pub enum RegType {
243
/// An `x` register: integers.
244
XReg,
245
246
/// An `f` register: floats.
247
FReg,
248
249
/// A `v` register: vectors.
250
VReg,
251
}
252
253
/// A value that can be stored in a register.
254
#[derive(Clone, Copy, Debug)]
255
pub enum Val {
256
/// An `x` register value: integers.
257
XReg(XRegVal),
258
259
/// An `f` register value: floats.
260
FReg(FRegVal),
261
262
/// A `v` register value: vectors.
263
#[cfg(not(pulley_disable_interp_simd))]
264
VReg(VRegVal),
265
}
266
267
impl fmt::LowerHex for Val {
268
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
269
match self {
270
Val::XReg(v) => fmt::LowerHex::fmt(v, f),
271
Val::FReg(v) => fmt::LowerHex::fmt(v, f),
272
#[cfg(not(pulley_disable_interp_simd))]
273
Val::VReg(v) => fmt::LowerHex::fmt(v, f),
274
}
275
}
276
}
277
278
impl From<XRegVal> for Val {
279
fn from(value: XRegVal) -> Self {
280
Val::XReg(value)
281
}
282
}
283
284
impl From<u64> for Val {
285
fn from(value: u64) -> Self {
286
XRegVal::new_u64(value).into()
287
}
288
}
289
290
impl From<u32> for Val {
291
fn from(value: u32) -> Self {
292
XRegVal::new_u32(value).into()
293
}
294
}
295
296
impl From<i64> for Val {
297
fn from(value: i64) -> Self {
298
XRegVal::new_i64(value).into()
299
}
300
}
301
302
impl From<i32> for Val {
303
fn from(value: i32) -> Self {
304
XRegVal::new_i32(value).into()
305
}
306
}
307
308
impl<T> From<*mut T> for Val {
309
fn from(value: *mut T) -> Self {
310
XRegVal::new_ptr(value).into()
311
}
312
}
313
314
impl From<FRegVal> for Val {
315
fn from(value: FRegVal) -> Self {
316
Val::FReg(value)
317
}
318
}
319
320
impl From<f64> for Val {
321
fn from(value: f64) -> Self {
322
FRegVal::new_f64(value).into()
323
}
324
}
325
326
impl From<f32> for Val {
327
fn from(value: f32) -> Self {
328
FRegVal::new_f32(value).into()
329
}
330
}
331
332
#[cfg(not(pulley_disable_interp_simd))]
333
impl From<VRegVal> for Val {
334
fn from(value: VRegVal) -> Self {
335
Val::VReg(value)
336
}
337
}
338
339
/// An `x` register value: integers.
340
#[derive(Copy, Clone)]
341
pub struct XRegVal(XRegUnion);
342
343
impl PartialEq for XRegVal {
344
fn eq(&self, other: &Self) -> bool {
345
self.get_u64() == other.get_u64()
346
}
347
}
348
349
impl Eq for XRegVal {}
350
351
impl fmt::Debug for XRegVal {
352
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353
f.debug_struct("XRegVal")
354
.field("as_u64", &self.get_u64())
355
.finish()
356
}
357
}
358
359
impl fmt::LowerHex for XRegVal {
360
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
361
fmt::LowerHex::fmt(&self.get_u64(), f)
362
}
363
}
364
365
/// Contents of an "x" register, or a general-purpose register.
366
///
367
/// This is represented as a Rust `union` to make it easier to access typed
368
/// views of this, notably the `ptr` field which enables preserving a bit of
369
/// provenance for Rust for values stored as a pointer and read as a pointer.
370
///
371
/// Note that the actual in-memory representation of this value is handled
372
/// carefully at this time. Pulley bytecode exposes the ability to store a
373
/// 32-bit result into a register and then read the 64-bit contents of the
374
/// register. This leaves us with the question of what to do with the upper bits
375
/// of the register when the 32-bit result is generated. Possibilities for
376
/// handling this are:
377
///
378
/// 1. Do nothing, just store the 32-bit value. The problem with this approach
379
/// means that the "upper bits" are now endianness-dependent. That means that
380
/// the state of the register is now platform-dependent.
381
/// 2. Sign or zero-extend. This restores platform-independent behavior but
382
/// requires an extra store on 32-bit platforms because they can probably
383
/// only store 32-bits at a time.
384
/// 3. Always store the values in this union as little-endian. This means that
385
/// big-endian platforms have to do a byte-swap but otherwise it has
386
/// platform-independent behavior.
387
///
388
/// This union chooses route (3) at this time where the values here are always
389
/// stored in little-endian form (even the `ptr` field). That guarantees
390
/// cross-platform behavior while also minimizing the amount of data stored on
391
/// writes.
392
///
393
/// In the future we may wish to benchmark this and possibly change this.
394
/// Technically Cranelift-generated bytecode should never rely on the upper bits
395
/// of a register if it didn't previously write them so this in theory doesn't
396
/// actually matter for Cranelift or wasm semantics. The only cost right now is
397
/// to big-endian platforms though and it's not certain how crucial performance
398
/// will be there.
399
///
400
/// One final note is that this notably contrasts with native CPUs where
401
/// native ISAs like RISC-V specifically define the entire register on every
402
/// instruction, even if only the low half contains a significant result. Pulley
403
/// is unlikely to become out-of-order within the CPU itself as it's interpreted
404
/// meaning that severing data-dependencies with previous operations is
405
/// hypothesized to not be too important. If this is ever a problem though it
406
/// could increase the likelihood we go for route (2) above instead (or maybe
407
/// even (1)).
408
#[derive(Copy, Clone)]
409
union XRegUnion {
410
i32: i32,
411
u32: u32,
412
i64: i64,
413
u64: u64,
414
415
// Note that this is intentionally `usize` and not an actual pointer like
416
// `*mut u8`. The reason for this is that provenance is required in Rust for
417
// pointers but Cranelift has no pointer type and thus no concept of
418
// provenance. That means that at-rest it's not known whether the value has
419
// provenance or not and basically means that Pulley is required to use
420
// "permissive provenance" in Rust as opposed to strict provenance.
421
//
422
// That's more-or-less a long-winded way of saying that storage of a pointer
423
// in this value is done with `.expose_provenance()` and reading a pointer
424
// uses `with_exposed_provenance_mut(..)`.
425
ptr: usize,
426
}
427
428
impl Default for XRegVal {
429
fn default() -> Self {
430
Self(unsafe { mem::zeroed() })
431
}
432
}
433
434
#[expect(missing_docs, reason = "self-describing methods")]
435
impl XRegVal {
436
pub fn new_i32(x: i32) -> Self {
437
let mut val = XRegVal::default();
438
val.set_i32(x);
439
val
440
}
441
442
pub fn new_u32(x: u32) -> Self {
443
let mut val = XRegVal::default();
444
val.set_u32(x);
445
val
446
}
447
448
pub fn new_i64(x: i64) -> Self {
449
let mut val = XRegVal::default();
450
val.set_i64(x);
451
val
452
}
453
454
pub fn new_u64(x: u64) -> Self {
455
let mut val = XRegVal::default();
456
val.set_u64(x);
457
val
458
}
459
460
pub fn new_ptr<T>(ptr: *mut T) -> Self {
461
let mut val = XRegVal::default();
462
val.set_ptr(ptr);
463
val
464
}
465
466
pub fn get_i32(&self) -> i32 {
467
let x = unsafe { self.0.i32 };
468
i32::from_le(x)
469
}
470
471
pub fn get_u32(&self) -> u32 {
472
let x = unsafe { self.0.u32 };
473
u32::from_le(x)
474
}
475
476
pub fn get_i64(&self) -> i64 {
477
let x = unsafe { self.0.i64 };
478
i64::from_le(x)
479
}
480
481
pub fn get_u64(&self) -> u64 {
482
let x = unsafe { self.0.u64 };
483
u64::from_le(x)
484
}
485
486
pub fn get_ptr<T>(&self) -> *mut T {
487
let ptr = unsafe { self.0.ptr };
488
core::ptr::with_exposed_provenance_mut(usize::from_le(ptr))
489
}
490
491
pub fn set_i32(&mut self, x: i32) {
492
self.0.i32 = x.to_le();
493
}
494
495
pub fn set_u32(&mut self, x: u32) {
496
self.0.u32 = x.to_le();
497
}
498
499
pub fn set_i64(&mut self, x: i64) {
500
self.0.i64 = x.to_le();
501
}
502
503
pub fn set_u64(&mut self, x: u64) {
504
self.0.u64 = x.to_le();
505
}
506
507
pub fn set_ptr<T>(&mut self, ptr: *mut T) {
508
self.0.ptr = ptr.expose_provenance().to_le();
509
}
510
}
511
512
/// An `f` register value: floats.
513
#[derive(Copy, Clone)]
514
pub struct FRegVal(FRegUnion);
515
516
impl fmt::Debug for FRegVal {
517
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
518
f.debug_struct("FRegVal")
519
.field("as_f32", &self.get_f32())
520
.field("as_f64", &self.get_f64())
521
.finish()
522
}
523
}
524
525
impl fmt::LowerHex for FRegVal {
526
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
527
fmt::LowerHex::fmt(&self.get_f64().to_bits(), f)
528
}
529
}
530
531
// NB: like `XRegUnion` values here are always little-endian, see the
532
// documentation above for more details.
533
#[derive(Copy, Clone)]
534
union FRegUnion {
535
f32: u32,
536
f64: u64,
537
}
538
539
impl Default for FRegVal {
540
fn default() -> Self {
541
Self(unsafe { mem::zeroed() })
542
}
543
}
544
545
#[expect(missing_docs, reason = "self-describing methods")]
546
impl FRegVal {
547
pub fn new_f32(f: f32) -> Self {
548
let mut val = Self::default();
549
val.set_f32(f);
550
val
551
}
552
553
pub fn new_f64(f: f64) -> Self {
554
let mut val = Self::default();
555
val.set_f64(f);
556
val
557
}
558
559
pub fn get_f32(&self) -> f32 {
560
let val = unsafe { self.0.f32 };
561
f32::from_le_bytes(val.to_ne_bytes())
562
}
563
564
pub fn get_f64(&self) -> f64 {
565
let val = unsafe { self.0.f64 };
566
f64::from_le_bytes(val.to_ne_bytes())
567
}
568
569
pub fn set_f32(&mut self, val: f32) {
570
self.0.f32 = u32::from_ne_bytes(val.to_le_bytes());
571
}
572
573
pub fn set_f64(&mut self, val: f64) {
574
self.0.f64 = u64::from_ne_bytes(val.to_le_bytes());
575
}
576
}
577
578
/// A `v` register value: vectors.
579
#[derive(Copy, Clone)]
580
#[cfg(not(pulley_disable_interp_simd))]
581
pub struct VRegVal(VRegUnion);
582
583
#[cfg(not(pulley_disable_interp_simd))]
584
impl fmt::Debug for VRegVal {
585
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
586
f.debug_struct("VRegVal")
587
.field("as_u128", &unsafe { self.0.u128 })
588
.finish()
589
}
590
}
591
592
#[cfg(not(pulley_disable_interp_simd))]
593
impl fmt::LowerHex for VRegVal {
594
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
595
fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f)
596
}
597
}
598
599
/// 128-bit vector registers.
600
///
601
/// This register is always stored in little-endian order and has different
602
/// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this
603
/// union are the same width so all bits are always defined. Note that
604
/// little-endian is required though so bitcasts between different shapes of
605
/// vectors works. This union cannot be stored in big-endian.
606
#[derive(Copy, Clone)]
607
#[repr(align(16))]
608
#[cfg(not(pulley_disable_interp_simd))]
609
union VRegUnion {
610
u128: u128,
611
i8x16: [i8; 16],
612
i16x8: [i16; 8],
613
i32x4: [i32; 4],
614
i64x2: [i64; 2],
615
u8x16: [u8; 16],
616
u16x8: [u16; 8],
617
u32x4: [u32; 4],
618
u64x2: [u64; 2],
619
// Note that these are `u32` and `u64`, not f32/f64. That's only because
620
// f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the
621
// bits anyway.
622
f32x4: [u32; 4],
623
f64x2: [u64; 2],
624
}
625
626
#[cfg(not(pulley_disable_interp_simd))]
627
impl Default for VRegVal {
628
fn default() -> Self {
629
Self(unsafe { mem::zeroed() })
630
}
631
}
632
633
#[expect(missing_docs, reason = "self-describing methods")]
634
#[cfg(not(pulley_disable_interp_simd))]
635
impl VRegVal {
636
pub fn new_u128(i: u128) -> Self {
637
let mut val = Self::default();
638
val.set_u128(i);
639
val
640
}
641
642
pub fn get_u128(&self) -> u128 {
643
let val = unsafe { self.0.u128 };
644
u128::from_le(val)
645
}
646
647
pub fn set_u128(&mut self, val: u128) {
648
self.0.u128 = val.to_le();
649
}
650
651
fn get_i8x16(&self) -> [i8; 16] {
652
let val = unsafe { self.0.i8x16 };
653
val.map(|e| i8::from_le(e))
654
}
655
656
fn set_i8x16(&mut self, val: [i8; 16]) {
657
self.0.i8x16 = val.map(|e| e.to_le());
658
}
659
660
fn get_u8x16(&self) -> [u8; 16] {
661
let val = unsafe { self.0.u8x16 };
662
val.map(|e| u8::from_le(e))
663
}
664
665
fn set_u8x16(&mut self, val: [u8; 16]) {
666
self.0.u8x16 = val.map(|e| e.to_le());
667
}
668
669
fn get_i16x8(&self) -> [i16; 8] {
670
let val = unsafe { self.0.i16x8 };
671
val.map(|e| i16::from_le(e))
672
}
673
674
fn set_i16x8(&mut self, val: [i16; 8]) {
675
self.0.i16x8 = val.map(|e| e.to_le());
676
}
677
678
fn get_u16x8(&self) -> [u16; 8] {
679
let val = unsafe { self.0.u16x8 };
680
val.map(|e| u16::from_le(e))
681
}
682
683
fn set_u16x8(&mut self, val: [u16; 8]) {
684
self.0.u16x8 = val.map(|e| e.to_le());
685
}
686
687
fn get_i32x4(&self) -> [i32; 4] {
688
let val = unsafe { self.0.i32x4 };
689
val.map(|e| i32::from_le(e))
690
}
691
692
fn set_i32x4(&mut self, val: [i32; 4]) {
693
self.0.i32x4 = val.map(|e| e.to_le());
694
}
695
696
fn get_u32x4(&self) -> [u32; 4] {
697
let val = unsafe { self.0.u32x4 };
698
val.map(|e| u32::from_le(e))
699
}
700
701
fn set_u32x4(&mut self, val: [u32; 4]) {
702
self.0.u32x4 = val.map(|e| e.to_le());
703
}
704
705
fn get_i64x2(&self) -> [i64; 2] {
706
let val = unsafe { self.0.i64x2 };
707
val.map(|e| i64::from_le(e))
708
}
709
710
fn set_i64x2(&mut self, val: [i64; 2]) {
711
self.0.i64x2 = val.map(|e| e.to_le());
712
}
713
714
fn get_u64x2(&self) -> [u64; 2] {
715
let val = unsafe { self.0.u64x2 };
716
val.map(|e| u64::from_le(e))
717
}
718
719
fn set_u64x2(&mut self, val: [u64; 2]) {
720
self.0.u64x2 = val.map(|e| e.to_le());
721
}
722
723
fn get_f64x2(&self) -> [f64; 2] {
724
let val = unsafe { self.0.f64x2 };
725
val.map(|e| f64::from_bits(u64::from_le(e)))
726
}
727
728
fn set_f64x2(&mut self, val: [f64; 2]) {
729
self.0.f64x2 = val.map(|e| e.to_bits().to_le());
730
}
731
732
fn get_f32x4(&self) -> [f32; 4] {
733
let val = unsafe { self.0.f32x4 };
734
val.map(|e| f32::from_bits(u32::from_le(e)))
735
}
736
737
fn set_f32x4(&mut self, val: [f32; 4]) {
738
self.0.f32x4 = val.map(|e| e.to_bits().to_le());
739
}
740
}
741
742
/// The machine state for a Pulley virtual machine: the various registers and
743
/// stack.
744
pub struct MachineState {
745
x_regs: [XRegVal; XReg::RANGE.end as usize],
746
f_regs: [FRegVal; FReg::RANGE.end as usize],
747
#[cfg(not(pulley_disable_interp_simd))]
748
v_regs: [VRegVal; VReg::RANGE.end as usize],
749
fp: *mut u8,
750
lr: *mut u8,
751
stack: Stack,
752
done_reason: Option<DoneReason<()>>,
753
}
754
755
unsafe impl Send for MachineState {}
756
unsafe impl Sync for MachineState {}
757
758
/// Helper structure to store the state of the Pulley stack.
759
///
760
/// The Pulley stack notably needs to be a 16-byte aligned allocation on the
761
/// host to ensure that addresses handed out are indeed 16-byte aligned. This is
762
/// done with a custom `Vec<T>` internally where `T` has size and align of 16.
763
/// This is manually done with a helper `Align16` type below.
764
struct Stack {
765
storage: Vec<Align16>,
766
}
767
768
/// Helper type used with `Stack` above.
769
#[derive(Copy, Clone)]
770
#[repr(align(16))]
771
struct Align16 {
772
// Just here to give the structure a size of 16. The alignment is always 16
773
// regardless of what the host platform's alignment of u128 is.
774
_unused: u128,
775
}
776
777
impl Stack {
778
/// Creates a new stack which will have a byte size of at least `size`.
779
///
780
/// The allocated stack might be slightly larger due to rounding necessary.
781
fn new(size: usize) -> Stack {
782
Stack {
783
// Round up `size` to the nearest multiple of 16. Note that the
784
// stack is also allocated here but not initialized, and that's
785
// intentional as pulley bytecode should always initialize the stack
786
// before use.
787
storage: Vec::with_capacity((size + 15) / 16),
788
}
789
}
790
791
/// Returns a pointer to the top of the stack (the highest address).
792
///
793
/// Note that the returned pointer has provenance for the entire stack
794
/// allocation, however, not just the top.
795
fn top(&mut self) -> *mut u8 {
796
let len = self.len();
797
unsafe { self.base().add(len) }
798
}
799
800
/// Returns a pointer to the base of the stack (the lowest address).
801
///
802
/// Note that the returned pointer has provenance for the entire stack
803
/// allocation, however, not just the top.
804
fn base(&mut self) -> *mut u8 {
805
self.storage.as_mut_ptr().cast::<u8>()
806
}
807
808
/// Returns the length, in bytes, of this stack allocation.
809
fn len(&self) -> usize {
810
self.storage.capacity() * mem::size_of::<Align16>()
811
}
812
}
813
814
impl fmt::Debug for MachineState {
815
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
816
let MachineState {
817
x_regs,
818
f_regs,
819
#[cfg(not(pulley_disable_interp_simd))]
820
v_regs,
821
stack: _,
822
done_reason: _,
823
fp: _,
824
lr: _,
825
} = self;
826
827
struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String);
828
829
impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> {
830
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
831
let mut f = f.debug_map();
832
for (i, r) in self.0.iter().enumerate() {
833
f.entry(&(self.1)(i as u8), r);
834
}
835
f.finish()
836
}
837
}
838
839
let mut f = f.debug_struct("MachineState");
840
841
f.field(
842
"x_regs",
843
&RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()),
844
)
845
.field(
846
"f_regs",
847
&RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()),
848
);
849
#[cfg(not(pulley_disable_interp_simd))]
850
f.field(
851
"v_regs",
852
&RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()),
853
);
854
f.finish_non_exhaustive()
855
}
856
}
857
858
macro_rules! index_reg {
859
($reg_ty:ty,$value_ty:ty,$field:ident) => {
860
impl Index<$reg_ty> for Vm {
861
type Output = $value_ty;
862
863
fn index(&self, reg: $reg_ty) -> &Self::Output {
864
&self.state[reg]
865
}
866
}
867
868
impl IndexMut<$reg_ty> for Vm {
869
fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
870
&mut self.state[reg]
871
}
872
}
873
874
impl Index<$reg_ty> for MachineState {
875
type Output = $value_ty;
876
877
fn index(&self, reg: $reg_ty) -> &Self::Output {
878
&self.$field[reg.index()]
879
}
880
}
881
882
impl IndexMut<$reg_ty> for MachineState {
883
fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
884
&mut self.$field[reg.index()]
885
}
886
}
887
};
888
}
889
890
index_reg!(XReg, XRegVal, x_regs);
891
index_reg!(FReg, FRegVal, f_regs);
892
#[cfg(not(pulley_disable_interp_simd))]
893
index_reg!(VReg, VRegVal, v_regs);
894
895
/// Sentinel return address that signals the end of the call stack.
896
const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8;
897
898
impl MachineState {
899
fn with_stack(stack_size: usize) -> Self {
900
let mut state = Self {
901
x_regs: [Default::default(); XReg::RANGE.end as usize],
902
f_regs: Default::default(),
903
#[cfg(not(pulley_disable_interp_simd))]
904
v_regs: Default::default(),
905
stack: Stack::new(stack_size),
906
done_reason: None,
907
fp: HOST_RETURN_ADDR,
908
lr: HOST_RETURN_ADDR,
909
};
910
911
let sp = state.stack.top();
912
state[XReg::sp] = XRegVal::new_ptr(sp);
913
914
state
915
}
916
}
917
918
/// Inner private module to prevent creation of the `Done` structure outside of
919
/// this module.
920
mod done {
921
use super::{Encode, Interpreter, MachineState};
922
use core::ops::ControlFlow;
923
use core::ptr::NonNull;
924
925
/// Zero-sized sentinel indicating that pulley execution has halted.
926
///
927
/// The reason for halting is stored in `MachineState`.
928
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
929
pub struct Done {
930
_priv: (),
931
}
932
933
/// Reason that the pulley interpreter has ceased execution.
934
pub enum DoneReason<T> {
935
/// A trap happened at this bytecode instruction.
936
Trap {
937
/// Which instruction is raising this trap.
938
pc: NonNull<u8>,
939
/// The kind of trap being raised, if known.
940
kind: Option<TrapKind>,
941
},
942
/// The `call_indirect_host` instruction was executed.
943
CallIndirectHost {
944
/// The payload of `call_indirect_host`.
945
id: u8,
946
/// Where to resume execution after the host has finished.
947
resume: NonNull<u8>,
948
},
949
/// Pulley has finished and the provided value is being returned.
950
ReturnToHost(T),
951
}
952
953
/// Stored within `DoneReason::Trap`.
954
#[expect(missing_docs, reason = "self-describing variants")]
955
pub enum TrapKind {
956
DivideByZero,
957
IntegerOverflow,
958
BadConversionToInteger,
959
MemoryOutOfBounds,
960
DisabledOpcode,
961
StackOverflow,
962
}
963
964
impl MachineState {
965
pub(super) fn debug_assert_done_reason_none(&mut self) {
966
debug_assert!(self.done_reason.is_none());
967
}
968
969
pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> {
970
self.done_reason.take().unwrap()
971
}
972
}
973
974
impl Interpreter<'_> {
975
/// Finishes execution by recording `DoneReason::Trap`.
976
///
977
/// This method takes an `I` generic parameter indicating which
978
/// instruction is executing this function and generating a trap. That's
979
/// used to go backwards from the current `pc` which is just beyond the
980
/// instruction to point to the instruction itself in the trap metadata
981
/// returned from the interpreter.
982
#[cold]
983
pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> {
984
self.done_trap_kind::<I>(None)
985
}
986
987
/// Same as `done_trap` but with an explicit `TrapKind`.
988
#[cold]
989
pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> {
990
let pc = self.current_pc::<I>();
991
self.state.done_reason = Some(DoneReason::Trap { pc, kind });
992
ControlFlow::Break(Done { _priv: () })
993
}
994
995
/// Finishes execution by recording `DoneReason::CallIndirectHost`.
996
#[cold]
997
pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
998
self.state.done_reason = Some(DoneReason::CallIndirectHost {
999
id,
1000
resume: self.pc.as_ptr(),
1001
});
1002
ControlFlow::Break(Done { _priv: () })
1003
}
1004
1005
/// Finishes execution by recording `DoneReason::ReturnToHost`.
1006
#[cold]
1007
pub fn done_return_to_host(&mut self) -> ControlFlow<Done> {
1008
self.state.done_reason = Some(DoneReason::ReturnToHost(()));
1009
ControlFlow::Break(Done { _priv: () })
1010
}
1011
}
1012
}
1013
1014
use done::Done;
1015
pub use done::{DoneReason, TrapKind};
1016
1017
struct Interpreter<'a> {
1018
state: &'a mut MachineState,
1019
pc: UnsafeBytecodeStream,
1020
executing_pc: ExecutingPcRef<'a>,
1021
}
1022
1023
impl Interpreter<'_> {
1024
/// Calculates the `offset` for the current instruction `I`.
1025
#[inline]
1026
fn pc_rel<I: Encode>(&mut self, offset: PcRelOffset) -> NonNull<u8> {
1027
let offset = isize::try_from(i32::from(offset)).unwrap();
1028
unsafe { self.current_pc::<I>().offset(offset) }
1029
}
1030
1031
/// Performs a relative jump of `offset` bytes from the current instruction.
1032
///
1033
/// This will jump from the start of the current instruction, identified by
1034
/// `I`, `offset` bytes away. Note that the `self.pc` at the start of this
1035
/// function actually points to the instruction after this one so `I` is
1036
/// necessary to go back to ourselves after which we then go `offset` away.
1037
#[inline]
1038
fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1039
let new_pc = self.pc_rel::<I>(offset);
1040
self.pc = unsafe { UnsafeBytecodeStream::new(new_pc) };
1041
ControlFlow::Continue(())
1042
}
1043
1044
/// Returns the PC of the current instruction where `I` is the static type
1045
/// representing the current instruction.
1046
fn current_pc<I: Encode>(&self) -> NonNull<u8> {
1047
unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() }
1048
}
1049
1050
/// `sp -= size_of::<T>(); *sp = val;`
1051
///
1052
/// Note that `I` is the instruction which is pushing data to use if a trap
1053
/// is generated.
1054
#[must_use]
1055
fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> {
1056
let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1);
1057
self.set_sp::<I>(new_sp.cast())?;
1058
unsafe {
1059
new_sp.write_unaligned(val);
1060
}
1061
ControlFlow::Continue(())
1062
}
1063
1064
/// `ret = *sp; sp -= size_of::<T>()`
1065
fn pop<T>(&mut self) -> T {
1066
let sp = self.state[XReg::sp].get_ptr::<T>();
1067
let val = unsafe { sp.read_unaligned() };
1068
self.set_sp_unchecked(sp.wrapping_add(1));
1069
val
1070
}
1071
1072
/// Sets the stack pointer to the `sp` provided.
1073
///
1074
/// Returns a trap if this would result in stack overflow, or if `sp` is
1075
/// beneath the base pointer of `self.state.stack`.
1076
///
1077
/// The `I` parameter here is the instruction that is setting the stack
1078
/// pointer and is used to calculate this instruction's own `pc` if this
1079
/// instruction traps.
1080
#[must_use]
1081
fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> {
1082
let sp_raw = sp as usize;
1083
let base_raw = self.state.stack.base() as usize;
1084
if sp_raw < base_raw {
1085
return self.done_trap_kind::<I>(Some(TrapKind::StackOverflow));
1086
}
1087
self.set_sp_unchecked(sp);
1088
ControlFlow::Continue(())
1089
}
1090
1091
/// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should
1092
/// only be used with stack increment operations such as `pop`.
1093
fn set_sp_unchecked<T>(&mut self, sp: *mut T) {
1094
if cfg!(debug_assertions) {
1095
let sp_raw = sp as usize;
1096
let base = self.state.stack.base() as usize;
1097
let end = base + self.state.stack.len();
1098
assert!(base <= sp_raw && sp_raw <= end);
1099
}
1100
self.state[XReg::sp].set_ptr(sp);
1101
}
1102
1103
/// Loads a value of `T` using native-endian byte ordering from the `addr`
1104
/// specified.
1105
///
1106
/// The `I` type parameter is the instruction issuing this load which is
1107
/// used in case of traps to calculate the trapping pc.
1108
///
1109
/// Returns `ControlFlow::Break` if a trap happens or
1110
/// `ControlFlow::Continue` if the value was loaded successfully.
1111
///
1112
/// # Unsafety
1113
///
1114
/// Safety of this method relies on the safety of the original bytecode
1115
/// itself and correctly annotating both `T` and `I`.
1116
#[must_use]
1117
unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> {
1118
unsafe { addr.load_ne::<T, I>(self) }
1119
}
1120
1121
/// Stores a `val` to the `addr` specified.
1122
///
1123
/// The `I` type parameter is the instruction issuing this store which is
1124
/// used in case of traps to calculate the trapping pc.
1125
///
1126
/// Returns `ControlFlow::Break` if a trap happens or
1127
/// `ControlFlow::Continue` if the value was stored successfully.
1128
///
1129
/// # Unsafety
1130
///
1131
/// Safety of this method relies on the safety of the original bytecode
1132
/// itself and correctly annotating both `T` and `I`.
1133
#[must_use]
1134
unsafe fn store_ne<T, I: Encode>(
1135
&mut self,
1136
addr: impl AddressingMode,
1137
val: T,
1138
) -> ControlFlow<Done> {
1139
unsafe { addr.store_ne::<T, I>(self, val) }
1140
}
1141
1142
fn check_xnn_from_f32<I: Encode>(
1143
&mut self,
1144
val: f32,
1145
(lo, hi): (f32, f32),
1146
) -> ControlFlow<Done> {
1147
self.check_xnn_from_f64::<I>(val.into(), (lo.into(), hi.into()))
1148
}
1149
1150
fn check_xnn_from_f64<I: Encode>(
1151
&mut self,
1152
val: f64,
1153
(lo, hi): (f64, f64),
1154
) -> ControlFlow<Done> {
1155
if val != val {
1156
return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger));
1157
}
1158
let val = val.wasm_trunc();
1159
if val <= lo || val >= hi {
1160
return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow));
1161
}
1162
ControlFlow::Continue(())
1163
}
1164
1165
#[cfg(not(pulley_disable_interp_simd))]
1166
fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1167
let lo = self.state[lo].get_u64();
1168
let hi = self.state[hi].get_i64();
1169
i128::from(lo) | (i128::from(hi) << 64)
1170
}
1171
1172
#[cfg(not(pulley_disable_interp_simd))]
1173
fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1174
self.state[lo].set_u64(val as u64);
1175
self.state[hi].set_u64((val >> 64) as u64);
1176
}
1177
1178
fn record_executing_pc_for_profiling(&mut self) {
1179
// Note that this is a no-op if `feature = "profile"` is disabled.
1180
self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize);
1181
}
1182
}
1183
1184
/// Helper trait to encompass the various addressing modes of Pulley.
1185
trait AddressingMode: Sized {
1186
/// Calculates the native host address `*mut T` corresponding to this
1187
/// addressing mode.
1188
///
1189
/// # Safety
1190
///
1191
/// Relies on the original bytecode being safe to execute as this will
1192
/// otherwise perform unsafe byte offsets for example which requires the
1193
/// original bytecode to be correct.
1194
#[must_use]
1195
unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>;
1196
1197
/// Loads a value of `T` from this address, using native-endian byte order.
1198
///
1199
/// For more information see [`Interpreter::load_ne`].
1200
#[must_use]
1201
unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> {
1202
let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() };
1203
ControlFlow::Continue(ret)
1204
}
1205
1206
/// Stores a `val` to this address, using native-endian byte order.
1207
///
1208
/// For more information see [`Interpreter::store_ne`].
1209
#[must_use]
1210
unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> {
1211
unsafe {
1212
self.addr::<T, I>(i)?.write_unaligned(val);
1213
}
1214
ControlFlow::Continue(())
1215
}
1216
}
1217
1218
impl AddressingMode for AddrO32 {
1219
unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1220
// Note that this addressing mode cannot return `ControlFlow::Break`
1221
// which is intentional. It's expected that LLVM optimizes away any
1222
// branches callers have.
1223
unsafe {
1224
ControlFlow::Continue(
1225
i.state[self.addr]
1226
.get_ptr::<T>()
1227
.byte_offset(self.offset as isize),
1228
)
1229
}
1230
}
1231
}
1232
1233
impl AddressingMode for AddrZ {
1234
unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1235
// This addressing mode defines loading/storing to the null address as
1236
// a trap, but all other addresses are allowed.
1237
let host_addr = i.state[self.addr].get_ptr::<T>();
1238
if host_addr.is_null() {
1239
i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1240
unreachable!();
1241
}
1242
unsafe {
1243
let addr = host_addr.byte_offset(self.offset as isize);
1244
ControlFlow::Continue(addr)
1245
}
1246
}
1247
}
1248
1249
impl AddressingMode for AddrG32 {
1250
unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1251
// Test if `bound - offset - T` is less than the wasm address to
1252
// generate a trap. It's a guarantee of this instruction that these
1253
// subtractions don't overflow.
1254
let bound = i.state[self.host_heap_bound].get_u64() as usize;
1255
let offset = usize::from(self.offset);
1256
let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1257
if wasm_addr > bound - offset - size_of::<T>() {
1258
i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1259
unreachable!();
1260
}
1261
unsafe {
1262
let addr = i.state[self.host_heap_base]
1263
.get_ptr::<T>()
1264
.byte_add(wasm_addr)
1265
.byte_add(offset);
1266
ControlFlow::Continue(addr)
1267
}
1268
}
1269
}
1270
1271
impl AddressingMode for AddrG32Bne {
1272
unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1273
// Same as `AddrG32` above except that the bound is loaded from memory.
1274
let bound = unsafe {
1275
*i.state[self.host_heap_bound_addr]
1276
.get_ptr::<usize>()
1277
.byte_add(usize::from(self.host_heap_bound_offset))
1278
};
1279
let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1280
let offset = usize::from(self.offset);
1281
if wasm_addr > bound - offset - size_of::<T>() {
1282
i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1283
unreachable!();
1284
}
1285
unsafe {
1286
let addr = i.state[self.host_heap_base]
1287
.get_ptr::<T>()
1288
.byte_add(wasm_addr)
1289
.byte_add(offset);
1290
ControlFlow::Continue(addr)
1291
}
1292
}
1293
}
1294
1295
#[test]
1296
fn simple_push_pop() {
1297
let mut state = MachineState::with_stack(16);
1298
let pc = ExecutingPc::default();
1299
unsafe {
1300
let mut bytecode = [0; 10];
1301
let mut i = Interpreter {
1302
state: &mut state,
1303
// this isn't actually read so just manufacture a dummy one
1304
pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()),
1305
executing_pc: pc.as_ref(),
1306
};
1307
assert!(i.push::<crate::Ret, _>(0_i32).is_continue());
1308
assert_eq!(i.pop::<i32>(), 0_i32);
1309
assert!(i.push::<crate::Ret, _>(1_i32).is_continue());
1310
assert!(i.push::<crate::Ret, _>(2_i32).is_continue());
1311
assert!(i.push::<crate::Ret, _>(3_i32).is_continue());
1312
assert!(i.push::<crate::Ret, _>(4_i32).is_continue());
1313
assert!(i.push::<crate::Ret, _>(5_i32).is_break());
1314
assert!(i.push::<crate::Ret, _>(6_i32).is_break());
1315
assert_eq!(i.pop::<i32>(), 4_i32);
1316
assert_eq!(i.pop::<i32>(), 3_i32);
1317
assert_eq!(i.pop::<i32>(), 2_i32);
1318
assert_eq!(i.pop::<i32>(), 1_i32);
1319
}
1320
}
1321
1322
macro_rules! br_if_imm {
1323
($(
1324
fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset)
1325
= $camel:ident / $op:tt / $get:ident;
1326
)*) => {$(
1327
fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> {
1328
let a = self.state[a].$get();
1329
if a $op b.into() {
1330
self.pc_rel_jump::<crate::$camel>(offset)
1331
} else {
1332
ControlFlow::Continue(())
1333
}
1334
}
1335
)*};
1336
}
1337
1338
impl OpVisitor for Interpreter<'_> {
1339
type BytecodeStream = UnsafeBytecodeStream;
1340
type Return = ControlFlow<Done>;
1341
1342
fn bytecode(&mut self) -> &mut UnsafeBytecodeStream {
1343
&mut self.pc
1344
}
1345
1346
fn nop(&mut self) -> ControlFlow<Done> {
1347
ControlFlow::Continue(())
1348
}
1349
1350
fn ret(&mut self) -> ControlFlow<Done> {
1351
let lr = self.state.lr;
1352
if lr == HOST_RETURN_ADDR {
1353
self.done_return_to_host()
1354
} else {
1355
self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) };
1356
ControlFlow::Continue(())
1357
}
1358
}
1359
1360
fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1361
let return_addr = self.pc.as_ptr();
1362
self.state.lr = return_addr.as_ptr();
1363
self.pc_rel_jump::<crate::Call>(offset)
1364
}
1365
1366
fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1367
let return_addr = self.pc.as_ptr();
1368
self.state.lr = return_addr.as_ptr();
1369
self.state[XReg::x0] = self.state[arg1];
1370
self.pc_rel_jump::<crate::Call1>(offset)
1371
}
1372
1373
fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1374
let return_addr = self.pc.as_ptr();
1375
self.state.lr = return_addr.as_ptr();
1376
let (x0, x1) = (self.state[arg1], self.state[arg2]);
1377
self.state[XReg::x0] = x0;
1378
self.state[XReg::x1] = x1;
1379
self.pc_rel_jump::<crate::Call2>(offset)
1380
}
1381
1382
fn call3(
1383
&mut self,
1384
arg1: XReg,
1385
arg2: XReg,
1386
arg3: XReg,
1387
offset: PcRelOffset,
1388
) -> ControlFlow<Done> {
1389
let return_addr = self.pc.as_ptr();
1390
self.state.lr = return_addr.as_ptr();
1391
let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1392
self.state[XReg::x0] = x0;
1393
self.state[XReg::x1] = x1;
1394
self.state[XReg::x2] = x2;
1395
self.pc_rel_jump::<crate::Call3>(offset)
1396
}
1397
1398
fn call4(
1399
&mut self,
1400
arg1: XReg,
1401
arg2: XReg,
1402
arg3: XReg,
1403
arg4: XReg,
1404
offset: PcRelOffset,
1405
) -> ControlFlow<Done> {
1406
let return_addr = self.pc.as_ptr();
1407
self.state.lr = return_addr.as_ptr();
1408
let (x0, x1, x2, x3) = (
1409
self.state[arg1],
1410
self.state[arg2],
1411
self.state[arg3],
1412
self.state[arg4],
1413
);
1414
self.state[XReg::x0] = x0;
1415
self.state[XReg::x1] = x1;
1416
self.state[XReg::x2] = x2;
1417
self.state[XReg::x3] = x3;
1418
self.pc_rel_jump::<crate::Call4>(offset)
1419
}
1420
1421
fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> {
1422
let return_addr = self.pc.as_ptr();
1423
self.state.lr = return_addr.as_ptr();
1424
// SAFETY: part of the unsafe contract of the interpreter is only valid
1425
// bytecode is interpreted, so the jump destination is part of the validity
1426
// of the bytecode itself.
1427
unsafe {
1428
self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr()));
1429
}
1430
ControlFlow::Continue(())
1431
}
1432
1433
fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1434
self.pc_rel_jump::<crate::Jump>(offset)
1435
}
1436
1437
fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
1438
unsafe {
1439
self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
1440
}
1441
ControlFlow::Continue(())
1442
}
1443
1444
fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1445
let cond = self.state[cond].get_u32();
1446
if cond != 0 {
1447
self.pc_rel_jump::<crate::BrIf>(offset)
1448
} else {
1449
ControlFlow::Continue(())
1450
}
1451
}
1452
1453
fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1454
let cond = self.state[cond].get_u32();
1455
if cond == 0 {
1456
self.pc_rel_jump::<crate::BrIfNot>(offset)
1457
} else {
1458
ControlFlow::Continue(())
1459
}
1460
}
1461
1462
fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1463
let a = self.state[a].get_u32();
1464
let b = self.state[b].get_u32();
1465
if a == b {
1466
self.pc_rel_jump::<crate::BrIfXeq32>(offset)
1467
} else {
1468
ControlFlow::Continue(())
1469
}
1470
}
1471
1472
fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1473
let a = self.state[a].get_u32();
1474
let b = self.state[b].get_u32();
1475
if a != b {
1476
self.pc_rel_jump::<crate::BrIfXneq32>(offset)
1477
} else {
1478
ControlFlow::Continue(())
1479
}
1480
}
1481
1482
fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1483
let a = self.state[a].get_i32();
1484
let b = self.state[b].get_i32();
1485
if a < b {
1486
self.pc_rel_jump::<crate::BrIfXslt32>(offset)
1487
} else {
1488
ControlFlow::Continue(())
1489
}
1490
}
1491
1492
fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1493
let a = self.state[a].get_i32();
1494
let b = self.state[b].get_i32();
1495
if a <= b {
1496
self.pc_rel_jump::<crate::BrIfXslteq32>(offset)
1497
} else {
1498
ControlFlow::Continue(())
1499
}
1500
}
1501
1502
fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1503
let a = self.state[a].get_u32();
1504
let b = self.state[b].get_u32();
1505
if a < b {
1506
self.pc_rel_jump::<crate::BrIfXult32>(offset)
1507
} else {
1508
ControlFlow::Continue(())
1509
}
1510
}
1511
1512
fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1513
let a = self.state[a].get_u32();
1514
let b = self.state[b].get_u32();
1515
if a <= b {
1516
self.pc_rel_jump::<crate::BrIfXulteq32>(offset)
1517
} else {
1518
ControlFlow::Continue(())
1519
}
1520
}
1521
1522
fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1523
let a = self.state[a].get_u64();
1524
let b = self.state[b].get_u64();
1525
if a == b {
1526
self.pc_rel_jump::<crate::BrIfXeq64>(offset)
1527
} else {
1528
ControlFlow::Continue(())
1529
}
1530
}
1531
1532
fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1533
let a = self.state[a].get_u64();
1534
let b = self.state[b].get_u64();
1535
if a != b {
1536
self.pc_rel_jump::<crate::BrIfXneq64>(offset)
1537
} else {
1538
ControlFlow::Continue(())
1539
}
1540
}
1541
1542
fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1543
let a = self.state[a].get_i64();
1544
let b = self.state[b].get_i64();
1545
if a < b {
1546
self.pc_rel_jump::<crate::BrIfXslt64>(offset)
1547
} else {
1548
ControlFlow::Continue(())
1549
}
1550
}
1551
1552
fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1553
let a = self.state[a].get_i64();
1554
let b = self.state[b].get_i64();
1555
if a <= b {
1556
self.pc_rel_jump::<crate::BrIfXslteq64>(offset)
1557
} else {
1558
ControlFlow::Continue(())
1559
}
1560
}
1561
1562
fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1563
let a = self.state[a].get_u64();
1564
let b = self.state[b].get_u64();
1565
if a < b {
1566
self.pc_rel_jump::<crate::BrIfXult64>(offset)
1567
} else {
1568
ControlFlow::Continue(())
1569
}
1570
}
1571
1572
fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1573
let a = self.state[a].get_u64();
1574
let b = self.state[b].get_u64();
1575
if a <= b {
1576
self.pc_rel_jump::<crate::BrIfXulteq64>(offset)
1577
} else {
1578
ControlFlow::Continue(())
1579
}
1580
}
1581
1582
br_if_imm! {
1583
fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1584
= BrIfXeq32I8 / == / get_i32;
1585
fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1586
= BrIfXeq32I32 / == / get_i32;
1587
fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1588
= BrIfXneq32I8 / != / get_i32;
1589
fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1590
= BrIfXneq32I32 / != / get_i32;
1591
1592
fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1593
= BrIfXslt32I8 / < / get_i32;
1594
fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1595
= BrIfXslt32I32 / < / get_i32;
1596
fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1597
= BrIfXsgt32I8 / > / get_i32;
1598
fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1599
= BrIfXsgt32I32 / > / get_i32;
1600
fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1601
= BrIfXslteq32I8 / <= / get_i32;
1602
fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1603
= BrIfXslteq32I32 / <= / get_i32;
1604
fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1605
= BrIfXsgteq32I8 / >= / get_i32;
1606
fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1607
= BrIfXsgteq32I32 / >= / get_i32;
1608
1609
fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1610
= BrIfXult32U8 / < / get_u32;
1611
fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1612
= BrIfXult32U32 / < / get_u32;
1613
fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1614
= BrIfXugt32U8 / > / get_u32;
1615
fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1616
= BrIfXugt32U32 / > / get_u32;
1617
fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1618
= BrIfXulteq32U8 / <= / get_u32;
1619
fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1620
= BrIfXulteq32U32 / <= / get_u32;
1621
fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1622
= BrIfXugteq32U8 / >= / get_u32;
1623
fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1624
= BrIfXugteq32U32 / >= / get_u32;
1625
1626
fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1627
= BrIfXeq64I8 / == / get_i64;
1628
fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1629
= BrIfXeq64I32 / == / get_i64;
1630
fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1631
= BrIfXneq64I8 / != / get_i64;
1632
fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1633
= BrIfXneq64I32 / != / get_i64;
1634
1635
fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1636
= BrIfXslt64I8 / < / get_i64;
1637
fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1638
= BrIfXslt64I32 / < / get_i64;
1639
fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1640
= BrIfXsgt64I8 / > / get_i64;
1641
fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1642
= BrIfXsgt64I32 / > / get_i64;
1643
fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1644
= BrIfXslteq64I8 / <= / get_i64;
1645
fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1646
= BrIfXslteq64I32 / <= / get_i64;
1647
fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1648
= BrIfXsgteq64I8 / >= / get_i64;
1649
fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1650
= BrIfXsgteq64I32 / >= / get_i64;
1651
1652
fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1653
= BrIfXult64U8 / < / get_u64;
1654
fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1655
= BrIfXult64U32 / < / get_u64;
1656
fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1657
= BrIfXugt64U8 / > / get_u64;
1658
fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1659
= BrIfXugt64U32 / > / get_u64;
1660
fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1661
= BrIfXulteq64U8 / <= / get_u64;
1662
fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1663
= BrIfXulteq64U32 / <= / get_u64;
1664
fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1665
= BrIfXugteq64U8 / >= / get_u64;
1666
fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1667
= BrIfXugteq64U32 / >= / get_u64;
1668
}
1669
1670
fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1671
let val = self.state[src];
1672
self.state[dst] = val;
1673
ControlFlow::Continue(())
1674
}
1675
1676
fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> {
1677
self.state[dst].set_i64(i64::from(imm));
1678
ControlFlow::Continue(())
1679
}
1680
1681
fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> {
1682
self.state[dst].set_i64(0);
1683
ControlFlow::Continue(())
1684
}
1685
1686
fn xone(&mut self, dst: XReg) -> ControlFlow<Done> {
1687
self.state[dst].set_i64(1);
1688
ControlFlow::Continue(())
1689
}
1690
1691
fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> {
1692
self.state[dst].set_i64(i64::from(imm));
1693
ControlFlow::Continue(())
1694
}
1695
1696
fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> {
1697
self.state[dst].set_i64(i64::from(imm));
1698
ControlFlow::Continue(())
1699
}
1700
1701
fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> {
1702
self.state[dst].set_i64(imm);
1703
ControlFlow::Continue(())
1704
}
1705
1706
fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1707
let a = self.state[operands.src1].get_u32();
1708
let b = self.state[operands.src2].get_u32();
1709
self.state[operands.dst].set_u32(a.wrapping_add(b));
1710
ControlFlow::Continue(())
1711
}
1712
1713
fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1714
self.xadd32_u32(dst, src1, src2.into())
1715
}
1716
1717
fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1718
let a = self.state[src1].get_u32();
1719
self.state[dst].set_u32(a.wrapping_add(src2));
1720
ControlFlow::Continue(())
1721
}
1722
1723
fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1724
let a = self.state[operands.src1].get_u64();
1725
let b = self.state[operands.src2].get_u64();
1726
self.state[operands.dst].set_u64(a.wrapping_add(b));
1727
ControlFlow::Continue(())
1728
}
1729
1730
fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1731
self.xadd64_u32(dst, src1, src2.into())
1732
}
1733
1734
fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1735
let a = self.state[src1].get_u64();
1736
self.state[dst].set_u64(a.wrapping_add(src2.into()));
1737
ControlFlow::Continue(())
1738
}
1739
1740
fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1741
let a = self.state[src1].get_u32();
1742
let b = self.state[src2].get_u32();
1743
let c = self.state[src3].get_u32();
1744
self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
1745
ControlFlow::Continue(())
1746
}
1747
1748
fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1749
let a = self.state[src1].get_u64();
1750
let b = self.state[src2].get_u64();
1751
let c = self.state[src3].get_u64();
1752
self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
1753
ControlFlow::Continue(())
1754
}
1755
1756
fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1757
let a = self.state[operands.src1].get_u32();
1758
let b = self.state[operands.src2].get_u32();
1759
self.state[operands.dst].set_u32(a.wrapping_sub(b));
1760
ControlFlow::Continue(())
1761
}
1762
1763
fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1764
self.xsub32_u32(dst, src1, src2.into())
1765
}
1766
1767
fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1768
let a = self.state[src1].get_u32();
1769
self.state[dst].set_u32(a.wrapping_sub(src2));
1770
ControlFlow::Continue(())
1771
}
1772
1773
fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1774
let a = self.state[operands.src1].get_u64();
1775
let b = self.state[operands.src2].get_u64();
1776
self.state[operands.dst].set_u64(a.wrapping_sub(b));
1777
ControlFlow::Continue(())
1778
}
1779
1780
fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1781
self.xsub64_u32(dst, src1, src2.into())
1782
}
1783
1784
fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1785
let a = self.state[src1].get_u64();
1786
self.state[dst].set_u64(a.wrapping_sub(src2.into()));
1787
ControlFlow::Continue(())
1788
}
1789
1790
fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1791
let a = self.state[operands.src1].get_u32();
1792
let b = self.state[operands.src2].get_u32();
1793
self.state[operands.dst].set_u32(a.wrapping_mul(b));
1794
ControlFlow::Continue(())
1795
}
1796
1797
fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1798
self.xmul32_s32(dst, src1, src2.into())
1799
}
1800
1801
fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1802
let a = self.state[src1].get_i32();
1803
self.state[dst].set_i32(a.wrapping_mul(src2));
1804
ControlFlow::Continue(())
1805
}
1806
1807
fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1808
let a = self.state[operands.src1].get_u64();
1809
let b = self.state[operands.src2].get_u64();
1810
self.state[operands.dst].set_u64(a.wrapping_mul(b));
1811
ControlFlow::Continue(())
1812
}
1813
1814
fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1815
self.xmul64_s32(dst, src1, src2.into())
1816
}
1817
1818
fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1819
let a = self.state[src1].get_i64();
1820
self.state[dst].set_i64(a.wrapping_mul(src2.into()));
1821
ControlFlow::Continue(())
1822
}
1823
1824
fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1825
let a = self.state[operands.src1].get_u32();
1826
let b = self.state[operands.src2].get_u32();
1827
self.state[operands.dst].set_u32(a.wrapping_shl(b));
1828
ControlFlow::Continue(())
1829
}
1830
1831
fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1832
let a = self.state[operands.src1].get_u32();
1833
let b = self.state[operands.src2].get_u32();
1834
self.state[operands.dst].set_u32(a.wrapping_shr(b));
1835
ControlFlow::Continue(())
1836
}
1837
1838
fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1839
let a = self.state[operands.src1].get_i32();
1840
let b = self.state[operands.src2].get_u32();
1841
self.state[operands.dst].set_i32(a.wrapping_shr(b));
1842
ControlFlow::Continue(())
1843
}
1844
1845
fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1846
let a = self.state[operands.src1].get_u64();
1847
let b = self.state[operands.src2].get_u32();
1848
self.state[operands.dst].set_u64(a.wrapping_shl(b));
1849
ControlFlow::Continue(())
1850
}
1851
1852
fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1853
let a = self.state[operands.src1].get_u64();
1854
let b = self.state[operands.src2].get_u32();
1855
self.state[operands.dst].set_u64(a.wrapping_shr(b));
1856
ControlFlow::Continue(())
1857
}
1858
1859
fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1860
let a = self.state[operands.src1].get_i64();
1861
let b = self.state[operands.src2].get_u32();
1862
self.state[operands.dst].set_i64(a.wrapping_shr(b));
1863
ControlFlow::Continue(())
1864
}
1865
1866
fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1867
let a = self.state[operands.src1].get_u32();
1868
let b = u32::from(u8::from(operands.src2));
1869
self.state[operands.dst].set_u32(a.wrapping_shl(b));
1870
ControlFlow::Continue(())
1871
}
1872
1873
fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1874
let a = self.state[operands.src1].get_u32();
1875
let b = u32::from(u8::from(operands.src2));
1876
self.state[operands.dst].set_u32(a.wrapping_shr(b));
1877
ControlFlow::Continue(())
1878
}
1879
1880
fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1881
let a = self.state[operands.src1].get_i32();
1882
let b = u32::from(u8::from(operands.src2));
1883
self.state[operands.dst].set_i32(a.wrapping_shr(b));
1884
ControlFlow::Continue(())
1885
}
1886
1887
fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1888
let a = self.state[operands.src1].get_u64();
1889
let b = u32::from(u8::from(operands.src2));
1890
self.state[operands.dst].set_u64(a.wrapping_shl(b));
1891
ControlFlow::Continue(())
1892
}
1893
1894
fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1895
let a = self.state[operands.src1].get_u64();
1896
let b = u32::from(u8::from(operands.src2));
1897
self.state[operands.dst].set_u64(a.wrapping_shr(b));
1898
ControlFlow::Continue(())
1899
}
1900
1901
fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1902
let a = self.state[operands.src1].get_i64();
1903
let b = u32::from(u8::from(operands.src2));
1904
self.state[operands.dst].set_i64(a.wrapping_shr(b));
1905
ControlFlow::Continue(())
1906
}
1907
1908
fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1909
let a = self.state[src].get_i32();
1910
self.state[dst].set_i32(a.wrapping_neg());
1911
ControlFlow::Continue(())
1912
}
1913
1914
fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1915
let a = self.state[src].get_i64();
1916
self.state[dst].set_i64(a.wrapping_neg());
1917
ControlFlow::Continue(())
1918
}
1919
1920
fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1921
let a = self.state[operands.src1].get_u64();
1922
let b = self.state[operands.src2].get_u64();
1923
self.state[operands.dst].set_u32(u32::from(a == b));
1924
ControlFlow::Continue(())
1925
}
1926
1927
fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1928
let a = self.state[operands.src1].get_u64();
1929
let b = self.state[operands.src2].get_u64();
1930
self.state[operands.dst].set_u32(u32::from(a != b));
1931
ControlFlow::Continue(())
1932
}
1933
1934
fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1935
let a = self.state[operands.src1].get_i64();
1936
let b = self.state[operands.src2].get_i64();
1937
self.state[operands.dst].set_u32(u32::from(a < b));
1938
ControlFlow::Continue(())
1939
}
1940
1941
fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1942
let a = self.state[operands.src1].get_i64();
1943
let b = self.state[operands.src2].get_i64();
1944
self.state[operands.dst].set_u32(u32::from(a <= b));
1945
ControlFlow::Continue(())
1946
}
1947
1948
fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1949
let a = self.state[operands.src1].get_u64();
1950
let b = self.state[operands.src2].get_u64();
1951
self.state[operands.dst].set_u32(u32::from(a < b));
1952
ControlFlow::Continue(())
1953
}
1954
1955
fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1956
let a = self.state[operands.src1].get_u64();
1957
let b = self.state[operands.src2].get_u64();
1958
self.state[operands.dst].set_u32(u32::from(a <= b));
1959
ControlFlow::Continue(())
1960
}
1961
1962
fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1963
let a = self.state[operands.src1].get_u32();
1964
let b = self.state[operands.src2].get_u32();
1965
self.state[operands.dst].set_u32(u32::from(a == b));
1966
ControlFlow::Continue(())
1967
}
1968
1969
fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1970
let a = self.state[operands.src1].get_u32();
1971
let b = self.state[operands.src2].get_u32();
1972
self.state[operands.dst].set_u32(u32::from(a != b));
1973
ControlFlow::Continue(())
1974
}
1975
1976
fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1977
let a = self.state[operands.src1].get_i32();
1978
let b = self.state[operands.src2].get_i32();
1979
self.state[operands.dst].set_u32(u32::from(a < b));
1980
ControlFlow::Continue(())
1981
}
1982
1983
fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1984
let a = self.state[operands.src1].get_i32();
1985
let b = self.state[operands.src2].get_i32();
1986
self.state[operands.dst].set_u32(u32::from(a <= b));
1987
ControlFlow::Continue(())
1988
}
1989
1990
fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1991
let a = self.state[operands.src1].get_u32();
1992
let b = self.state[operands.src2].get_u32();
1993
self.state[operands.dst].set_u32(u32::from(a < b));
1994
ControlFlow::Continue(())
1995
}
1996
1997
fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1998
let a = self.state[operands.src1].get_u32();
1999
let b = self.state[operands.src2].get_u32();
2000
self.state[operands.dst].set_u32(u32::from(a <= b));
2001
ControlFlow::Continue(())
2002
}
2003
2004
fn push_frame(&mut self) -> ControlFlow<Done> {
2005
self.push::<crate::PushFrame, _>(self.state.lr)?;
2006
self.push::<crate::PushFrame, _>(self.state.fp)?;
2007
self.state.fp = self.state[XReg::sp].get_ptr();
2008
ControlFlow::Continue(())
2009
}
2010
2011
#[inline]
2012
fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2013
// Decrement the stack pointer `amt` bytes plus 2 pointers more for
2014
// fp/lr.
2015
let ptr_size = size_of::<usize>();
2016
let full_amt = usize::from(amt) + 2 * ptr_size;
2017
let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt);
2018
self.set_sp::<crate::PushFrameSave>(new_sp)?;
2019
2020
unsafe {
2021
// Emulate `push_frame` by placing `lr` and `fp` onto the stack, in
2022
// that order, at the top of the allocated area.
2023
self.store_ne::<_, crate::PushFrameSave>(
2024
AddrO32 {
2025
addr: XReg::sp,
2026
offset: (full_amt - 1 * ptr_size) as i32,
2027
},
2028
self.state.lr,
2029
)?;
2030
self.store_ne::<_, crate::PushFrameSave>(
2031
AddrO32 {
2032
addr: XReg::sp,
2033
offset: (full_amt - 2 * ptr_size) as i32,
2034
},
2035
self.state.fp,
2036
)?;
2037
2038
// Set `fp` to the top of our frame, where `fp` is stored.
2039
let mut offset = amt as i32;
2040
self.state.fp = self.state[XReg::sp]
2041
.get_ptr::<u8>()
2042
.byte_offset(offset as isize);
2043
2044
// Next save any registers in `regs` to the stack.
2045
for reg in regs {
2046
offset -= 8;
2047
self.store_ne::<_, crate::PushFrameSave>(
2048
AddrO32 {
2049
addr: XReg::sp,
2050
offset,
2051
},
2052
self.state[reg].get_u64(),
2053
)?;
2054
}
2055
}
2056
ControlFlow::Continue(())
2057
}
2058
2059
fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2060
// Restore all registers in `regs`, followed by the normal `pop_frame`
2061
// opcode below to restore fp/lr.
2062
unsafe {
2063
let mut offset = i32::from(amt);
2064
for reg in regs {
2065
offset -= 8;
2066
let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 {
2067
addr: XReg::sp,
2068
offset,
2069
})?;
2070
self.state[reg].set_u64(val);
2071
}
2072
}
2073
self.pop_frame()
2074
}
2075
2076
fn pop_frame(&mut self) -> ControlFlow<Done> {
2077
self.set_sp_unchecked(self.state.fp);
2078
let fp = self.pop();
2079
let lr = self.pop();
2080
self.state.fp = fp;
2081
self.state.lr = lr;
2082
ControlFlow::Continue(())
2083
}
2084
2085
fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> {
2086
let idx = self.state[idx].get_u32().min(amt - 1) as isize;
2087
// SAFETY: part of the contract of the interpreter is only dealing with
2088
// valid bytecode, so this offset should be safe.
2089
self.pc = unsafe { self.pc.offset(idx * 4) };
2090
2091
// Decode the `PcRelOffset` without tampering with `self.pc` as the
2092
// jump is relative to `self.pc`.
2093
let mut tmp = self.pc;
2094
let Ok(rel) = PcRelOffset::decode(&mut tmp);
2095
let offset = isize::try_from(i32::from(rel)).unwrap();
2096
self.pc = unsafe { self.pc.offset(offset) };
2097
ControlFlow::Continue(())
2098
}
2099
2100
fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> {
2101
let amt = usize::try_from(amt).unwrap();
2102
let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt);
2103
self.set_sp::<crate::StackAlloc32>(new_sp)?;
2104
ControlFlow::Continue(())
2105
}
2106
2107
fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> {
2108
let amt = usize::try_from(amt).unwrap();
2109
let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt);
2110
self.set_sp_unchecked(new_sp);
2111
ControlFlow::Continue(())
2112
}
2113
2114
fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2115
let src = self.state[src].get_u64() as u8;
2116
self.state[dst].set_u64(src.into());
2117
ControlFlow::Continue(())
2118
}
2119
2120
fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2121
let src = self.state[src].get_u64() as u16;
2122
self.state[dst].set_u64(src.into());
2123
ControlFlow::Continue(())
2124
}
2125
2126
fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2127
let src = self.state[src].get_u64() as u32;
2128
self.state[dst].set_u64(src.into());
2129
ControlFlow::Continue(())
2130
}
2131
2132
fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2133
let src = self.state[src].get_i64() as i8;
2134
self.state[dst].set_i64(src.into());
2135
ControlFlow::Continue(())
2136
}
2137
2138
fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2139
let src = self.state[src].get_i64() as i16;
2140
self.state[dst].set_i64(src.into());
2141
ControlFlow::Continue(())
2142
}
2143
2144
fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2145
let src = self.state[src].get_i64() as i32;
2146
self.state[dst].set_i64(src.into());
2147
ControlFlow::Continue(())
2148
}
2149
2150
fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2151
let a = self.state[operands.src1].get_i32();
2152
let b = self.state[operands.src2].get_i32();
2153
match a.checked_div(b) {
2154
Some(result) => {
2155
self.state[operands.dst].set_i32(result);
2156
ControlFlow::Continue(())
2157
}
2158
None => {
2159
let kind = if b == 0 {
2160
TrapKind::DivideByZero
2161
} else {
2162
TrapKind::IntegerOverflow
2163
};
2164
self.done_trap_kind::<crate::XDiv32S>(Some(kind))
2165
}
2166
}
2167
}
2168
2169
fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2170
let a = self.state[operands.src1].get_i64();
2171
let b = self.state[operands.src2].get_i64();
2172
match a.checked_div(b) {
2173
Some(result) => {
2174
self.state[operands.dst].set_i64(result);
2175
ControlFlow::Continue(())
2176
}
2177
None => {
2178
let kind = if b == 0 {
2179
TrapKind::DivideByZero
2180
} else {
2181
TrapKind::IntegerOverflow
2182
};
2183
self.done_trap_kind::<crate::XDiv64S>(Some(kind))
2184
}
2185
}
2186
}
2187
2188
fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2189
let a = self.state[operands.src1].get_u32();
2190
let b = self.state[operands.src2].get_u32();
2191
match a.checked_div(b) {
2192
Some(result) => {
2193
self.state[operands.dst].set_u32(result);
2194
ControlFlow::Continue(())
2195
}
2196
None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2197
}
2198
}
2199
2200
fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2201
let a = self.state[operands.src1].get_u64();
2202
let b = self.state[operands.src2].get_u64();
2203
match a.checked_div(b) {
2204
Some(result) => {
2205
self.state[operands.dst].set_u64(result);
2206
ControlFlow::Continue(())
2207
}
2208
None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2209
}
2210
}
2211
2212
fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2213
let a = self.state[operands.src1].get_i32();
2214
let b = self.state[operands.src2].get_i32();
2215
let result = if a == i32::MIN && b == -1 {
2216
Some(0)
2217
} else {
2218
a.checked_rem(b)
2219
};
2220
match result {
2221
Some(result) => {
2222
self.state[operands.dst].set_i32(result);
2223
ControlFlow::Continue(())
2224
}
2225
None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)),
2226
}
2227
}
2228
2229
fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2230
let a = self.state[operands.src1].get_i64();
2231
let b = self.state[operands.src2].get_i64();
2232
let result = if a == i64::MIN && b == -1 {
2233
Some(0)
2234
} else {
2235
a.checked_rem(b)
2236
};
2237
match result {
2238
Some(result) => {
2239
self.state[operands.dst].set_i64(result);
2240
ControlFlow::Continue(())
2241
}
2242
None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)),
2243
}
2244
}
2245
2246
fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2247
let a = self.state[operands.src1].get_u32();
2248
let b = self.state[operands.src2].get_u32();
2249
match a.checked_rem(b) {
2250
Some(result) => {
2251
self.state[operands.dst].set_u32(result);
2252
ControlFlow::Continue(())
2253
}
2254
None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)),
2255
}
2256
}
2257
2258
fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2259
let a = self.state[operands.src1].get_u64();
2260
let b = self.state[operands.src2].get_u64();
2261
match a.checked_rem(b) {
2262
Some(result) => {
2263
self.state[operands.dst].set_u64(result);
2264
ControlFlow::Continue(())
2265
}
2266
None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)),
2267
}
2268
}
2269
2270
fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2271
let a = self.state[operands.src1].get_u32();
2272
let b = self.state[operands.src2].get_u32();
2273
self.state[operands.dst].set_u32(a & b);
2274
ControlFlow::Continue(())
2275
}
2276
2277
fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2278
self.xband32_s32(dst, src1, src2.into())
2279
}
2280
2281
fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2282
let a = self.state[src1].get_i32();
2283
self.state[dst].set_i32(a & src2);
2284
ControlFlow::Continue(())
2285
}
2286
2287
fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2288
let a = self.state[operands.src1].get_u64();
2289
let b = self.state[operands.src2].get_u64();
2290
self.state[operands.dst].set_u64(a & b);
2291
ControlFlow::Continue(())
2292
}
2293
2294
fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2295
self.xband64_s32(dst, src1, src2.into())
2296
}
2297
2298
fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2299
let a = self.state[src1].get_i64();
2300
self.state[dst].set_i64(a & i64::from(src2));
2301
ControlFlow::Continue(())
2302
}
2303
2304
fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2305
let a = self.state[operands.src1].get_u32();
2306
let b = self.state[operands.src2].get_u32();
2307
self.state[operands.dst].set_u32(a | b);
2308
ControlFlow::Continue(())
2309
}
2310
2311
fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2312
self.xbor32_s32(dst, src1, src2.into())
2313
}
2314
2315
fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2316
let a = self.state[src1].get_i32();
2317
self.state[dst].set_i32(a | src2);
2318
ControlFlow::Continue(())
2319
}
2320
2321
fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2322
let a = self.state[operands.src1].get_u64();
2323
let b = self.state[operands.src2].get_u64();
2324
self.state[operands.dst].set_u64(a | b);
2325
ControlFlow::Continue(())
2326
}
2327
2328
fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2329
self.xbor64_s32(dst, src1, src2.into())
2330
}
2331
2332
fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2333
let a = self.state[src1].get_i64();
2334
self.state[dst].set_i64(a | i64::from(src2));
2335
ControlFlow::Continue(())
2336
}
2337
2338
fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2339
let a = self.state[operands.src1].get_u32();
2340
let b = self.state[operands.src2].get_u32();
2341
self.state[operands.dst].set_u32(a ^ b);
2342
ControlFlow::Continue(())
2343
}
2344
2345
fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2346
self.xbxor32_s32(dst, src1, src2.into())
2347
}
2348
2349
fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2350
let a = self.state[src1].get_i32();
2351
self.state[dst].set_i32(a ^ src2);
2352
ControlFlow::Continue(())
2353
}
2354
2355
fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2356
let a = self.state[operands.src1].get_u64();
2357
let b = self.state[operands.src2].get_u64();
2358
self.state[operands.dst].set_u64(a ^ b);
2359
ControlFlow::Continue(())
2360
}
2361
2362
fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2363
self.xbxor64_s32(dst, src1, src2.into())
2364
}
2365
2366
fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2367
let a = self.state[src1].get_i64();
2368
self.state[dst].set_i64(a ^ i64::from(src2));
2369
ControlFlow::Continue(())
2370
}
2371
2372
fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2373
let a = self.state[src].get_u32();
2374
self.state[dst].set_u32(!a);
2375
ControlFlow::Continue(())
2376
}
2377
2378
fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2379
let a = self.state[src].get_u64();
2380
self.state[dst].set_u64(!a);
2381
ControlFlow::Continue(())
2382
}
2383
2384
fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2385
let a = self.state[operands.src1].get_u32();
2386
let b = self.state[operands.src2].get_u32();
2387
self.state[operands.dst].set_u32(a.min(b));
2388
ControlFlow::Continue(())
2389
}
2390
2391
fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2392
let a = self.state[operands.src1].get_i32();
2393
let b = self.state[operands.src2].get_i32();
2394
self.state[operands.dst].set_i32(a.min(b));
2395
ControlFlow::Continue(())
2396
}
2397
2398
fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2399
let a = self.state[operands.src1].get_u32();
2400
let b = self.state[operands.src2].get_u32();
2401
self.state[operands.dst].set_u32(a.max(b));
2402
ControlFlow::Continue(())
2403
}
2404
2405
fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2406
let a = self.state[operands.src1].get_i32();
2407
let b = self.state[operands.src2].get_i32();
2408
self.state[operands.dst].set_i32(a.max(b));
2409
ControlFlow::Continue(())
2410
}
2411
2412
fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2413
let a = self.state[operands.src1].get_u64();
2414
let b = self.state[operands.src2].get_u64();
2415
self.state[operands.dst].set_u64(a.min(b));
2416
ControlFlow::Continue(())
2417
}
2418
2419
fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2420
let a = self.state[operands.src1].get_i64();
2421
let b = self.state[operands.src2].get_i64();
2422
self.state[operands.dst].set_i64(a.min(b));
2423
ControlFlow::Continue(())
2424
}
2425
2426
fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2427
let a = self.state[operands.src1].get_u64();
2428
let b = self.state[operands.src2].get_u64();
2429
self.state[operands.dst].set_u64(a.max(b));
2430
ControlFlow::Continue(())
2431
}
2432
2433
fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2434
let a = self.state[operands.src1].get_i64();
2435
let b = self.state[operands.src2].get_i64();
2436
self.state[operands.dst].set_i64(a.max(b));
2437
ControlFlow::Continue(())
2438
}
2439
2440
fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2441
let a = self.state[src].get_u32();
2442
self.state[dst].set_u32(a.trailing_zeros());
2443
ControlFlow::Continue(())
2444
}
2445
2446
fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2447
let a = self.state[src].get_u64();
2448
self.state[dst].set_u64(a.trailing_zeros().into());
2449
ControlFlow::Continue(())
2450
}
2451
2452
fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2453
let a = self.state[src].get_u32();
2454
self.state[dst].set_u32(a.leading_zeros());
2455
ControlFlow::Continue(())
2456
}
2457
2458
fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2459
let a = self.state[src].get_u64();
2460
self.state[dst].set_u64(a.leading_zeros().into());
2461
ControlFlow::Continue(())
2462
}
2463
2464
fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2465
let a = self.state[src].get_u32();
2466
self.state[dst].set_u32(a.count_ones());
2467
ControlFlow::Continue(())
2468
}
2469
2470
fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2471
let a = self.state[src].get_u64();
2472
self.state[dst].set_u64(a.count_ones().into());
2473
ControlFlow::Continue(())
2474
}
2475
2476
fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2477
let a = self.state[operands.src1].get_u32();
2478
let b = self.state[operands.src2].get_u32();
2479
self.state[operands.dst].set_u32(a.rotate_left(b));
2480
ControlFlow::Continue(())
2481
}
2482
2483
fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2484
let a = self.state[operands.src1].get_u64();
2485
let b = self.state[operands.src2].get_u32();
2486
self.state[operands.dst].set_u64(a.rotate_left(b));
2487
ControlFlow::Continue(())
2488
}
2489
2490
fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2491
let a = self.state[operands.src1].get_u32();
2492
let b = self.state[operands.src2].get_u32();
2493
self.state[operands.dst].set_u32(a.rotate_right(b));
2494
ControlFlow::Continue(())
2495
}
2496
2497
fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2498
let a = self.state[operands.src1].get_u64();
2499
let b = self.state[operands.src2].get_u32();
2500
self.state[operands.dst].set_u64(a.rotate_right(b));
2501
ControlFlow::Continue(())
2502
}
2503
2504
fn xselect32(
2505
&mut self,
2506
dst: XReg,
2507
cond: XReg,
2508
if_nonzero: XReg,
2509
if_zero: XReg,
2510
) -> ControlFlow<Done> {
2511
let result = if self.state[cond].get_u32() != 0 {
2512
self.state[if_nonzero].get_u32()
2513
} else {
2514
self.state[if_zero].get_u32()
2515
};
2516
self.state[dst].set_u32(result);
2517
ControlFlow::Continue(())
2518
}
2519
2520
fn xselect64(
2521
&mut self,
2522
dst: XReg,
2523
cond: XReg,
2524
if_nonzero: XReg,
2525
if_zero: XReg,
2526
) -> ControlFlow<Done> {
2527
let result = if self.state[cond].get_u32() != 0 {
2528
self.state[if_nonzero].get_u64()
2529
} else {
2530
self.state[if_zero].get_u64()
2531
};
2532
self.state[dst].set_u64(result);
2533
ControlFlow::Continue(())
2534
}
2535
2536
fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2537
let a = self.state[src].get_i32();
2538
self.state[dst].set_i32(a.wrapping_abs());
2539
ControlFlow::Continue(())
2540
}
2541
2542
fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2543
let a = self.state[src].get_i64();
2544
self.state[dst].set_i64(a.wrapping_abs());
2545
ControlFlow::Continue(())
2546
}
2547
2548
// =========================================================================
2549
// o32 addressing modes
2550
2551
fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2552
let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? };
2553
self.state[dst].set_u32(result.into());
2554
ControlFlow::Continue(())
2555
}
2556
2557
fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2558
let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? };
2559
self.state[dst].set_i32(result.into());
2560
ControlFlow::Continue(())
2561
}
2562
2563
fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2564
let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? };
2565
self.state[dst].set_u32(u16::from_le(result).into());
2566
ControlFlow::Continue(())
2567
}
2568
2569
fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2570
let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? };
2571
self.state[dst].set_i32(i16::from_le(result).into());
2572
ControlFlow::Continue(())
2573
}
2574
2575
fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2576
let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? };
2577
self.state[dst].set_i32(i32::from_le(result));
2578
ControlFlow::Continue(())
2579
}
2580
2581
fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2582
let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? };
2583
self.state[dst].set_i64(i64::from_le(result));
2584
ControlFlow::Continue(())
2585
}
2586
2587
fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2588
let val = self.state[val].get_u32() as u8;
2589
unsafe {
2590
self.store_ne::<u8, crate::XStore8O32>(addr, val)?;
2591
}
2592
ControlFlow::Continue(())
2593
}
2594
2595
fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2596
let val = self.state[val].get_u32() as u16;
2597
unsafe {
2598
self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?;
2599
}
2600
ControlFlow::Continue(())
2601
}
2602
2603
fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2604
let val = self.state[val].get_u32();
2605
unsafe {
2606
self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?;
2607
}
2608
ControlFlow::Continue(())
2609
}
2610
2611
fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2612
let val = self.state[val].get_u64();
2613
unsafe {
2614
self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?;
2615
}
2616
ControlFlow::Continue(())
2617
}
2618
2619
// =========================================================================
2620
// g32 addressing modes
2621
2622
fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2623
let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? };
2624
self.state[dst].set_u32(result.into());
2625
ControlFlow::Continue(())
2626
}
2627
2628
fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2629
let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? };
2630
self.state[dst].set_i32(result.into());
2631
ControlFlow::Continue(())
2632
}
2633
2634
fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2635
let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? };
2636
self.state[dst].set_u32(u16::from_le(result).into());
2637
ControlFlow::Continue(())
2638
}
2639
2640
fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2641
let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? };
2642
self.state[dst].set_i32(i16::from_le(result).into());
2643
ControlFlow::Continue(())
2644
}
2645
2646
fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2647
let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? };
2648
self.state[dst].set_i32(i32::from_le(result));
2649
ControlFlow::Continue(())
2650
}
2651
2652
fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2653
let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? };
2654
self.state[dst].set_i64(i64::from_le(result));
2655
ControlFlow::Continue(())
2656
}
2657
2658
fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2659
let val = self.state[val].get_u32() as u8;
2660
unsafe {
2661
self.store_ne::<u8, crate::XStore8G32>(addr, val)?;
2662
}
2663
ControlFlow::Continue(())
2664
}
2665
2666
fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2667
let val = self.state[val].get_u32() as u16;
2668
unsafe {
2669
self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?;
2670
}
2671
ControlFlow::Continue(())
2672
}
2673
2674
fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2675
let val = self.state[val].get_u32();
2676
unsafe {
2677
self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?;
2678
}
2679
ControlFlow::Continue(())
2680
}
2681
2682
fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2683
let val = self.state[val].get_u64();
2684
unsafe {
2685
self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?;
2686
}
2687
ControlFlow::Continue(())
2688
}
2689
2690
// =========================================================================
2691
// z addressing modes
2692
2693
fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2694
let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? };
2695
self.state[dst].set_u32(result.into());
2696
ControlFlow::Continue(())
2697
}
2698
2699
fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2700
let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? };
2701
self.state[dst].set_i32(result.into());
2702
ControlFlow::Continue(())
2703
}
2704
2705
fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2706
let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? };
2707
self.state[dst].set_u32(u16::from_le(result).into());
2708
ControlFlow::Continue(())
2709
}
2710
2711
fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2712
let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? };
2713
self.state[dst].set_i32(i16::from_le(result).into());
2714
ControlFlow::Continue(())
2715
}
2716
2717
fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2718
let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? };
2719
self.state[dst].set_i32(i32::from_le(result));
2720
ControlFlow::Continue(())
2721
}
2722
2723
fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2724
let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? };
2725
self.state[dst].set_i64(i64::from_le(result));
2726
ControlFlow::Continue(())
2727
}
2728
2729
fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2730
let val = self.state[val].get_u32() as u8;
2731
unsafe {
2732
self.store_ne::<u8, crate::XStore8Z>(addr, val)?;
2733
}
2734
ControlFlow::Continue(())
2735
}
2736
2737
fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2738
let val = self.state[val].get_u32() as u16;
2739
unsafe {
2740
self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?;
2741
}
2742
ControlFlow::Continue(())
2743
}
2744
2745
fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2746
let val = self.state[val].get_u32();
2747
unsafe {
2748
self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?;
2749
}
2750
ControlFlow::Continue(())
2751
}
2752
2753
fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2754
let val = self.state[val].get_u64();
2755
unsafe {
2756
self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?;
2757
}
2758
ControlFlow::Continue(())
2759
}
2760
2761
// =========================================================================
2762
// g32bne addressing modes
2763
2764
fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2765
let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? };
2766
self.state[dst].set_u32(result.into());
2767
ControlFlow::Continue(())
2768
}
2769
2770
fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2771
let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? };
2772
self.state[dst].set_i32(result.into());
2773
ControlFlow::Continue(())
2774
}
2775
2776
fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2777
let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? };
2778
self.state[dst].set_u32(u16::from_le(result).into());
2779
ControlFlow::Continue(())
2780
}
2781
2782
fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2783
let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? };
2784
self.state[dst].set_i32(i16::from_le(result).into());
2785
ControlFlow::Continue(())
2786
}
2787
2788
fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2789
let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? };
2790
self.state[dst].set_i32(i32::from_le(result));
2791
ControlFlow::Continue(())
2792
}
2793
2794
fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2795
let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? };
2796
self.state[dst].set_i64(i64::from_le(result));
2797
ControlFlow::Continue(())
2798
}
2799
2800
fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2801
let val = self.state[val].get_u32() as u8;
2802
unsafe {
2803
self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?;
2804
}
2805
ControlFlow::Continue(())
2806
}
2807
2808
fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2809
let val = self.state[val].get_u32() as u16;
2810
unsafe {
2811
self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?;
2812
}
2813
ControlFlow::Continue(())
2814
}
2815
2816
fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2817
let val = self.state[val].get_u32();
2818
unsafe {
2819
self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?;
2820
}
2821
ControlFlow::Continue(())
2822
}
2823
2824
fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2825
let val = self.state[val].get_u64();
2826
unsafe {
2827
self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?;
2828
}
2829
ControlFlow::Continue(())
2830
}
2831
}
2832
2833
impl ExtendedOpVisitor for Interpreter<'_> {
2834
fn trap(&mut self) -> ControlFlow<Done> {
2835
self.done_trap::<crate::Trap>()
2836
}
2837
2838
fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
2839
self.done_call_indirect_host(id)
2840
}
2841
2842
fn xpcadd(&mut self, dst: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
2843
let pc = self.pc_rel::<crate::Xpcadd>(offset);
2844
self.state[dst].set_ptr(pc.as_ptr());
2845
ControlFlow::Continue(())
2846
}
2847
2848
fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2849
let src = self.state[src].get_u32();
2850
self.state[dst].set_u32(src.swap_bytes());
2851
ControlFlow::Continue(())
2852
}
2853
2854
fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2855
let src = self.state[src].get_u64();
2856
self.state[dst].set_u64(src.swap_bytes());
2857
ControlFlow::Continue(())
2858
}
2859
2860
fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return {
2861
let a = self.state[src].get_u32();
2862
if a == 0 {
2863
self.state[dst].set_u32(0);
2864
} else {
2865
self.state[dst].set_i32(-1);
2866
}
2867
ControlFlow::Continue(())
2868
}
2869
2870
fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return {
2871
let a = self.state[src].get_u64();
2872
if a == 0 {
2873
self.state[dst].set_u64(0);
2874
} else {
2875
self.state[dst].set_i64(-1);
2876
}
2877
ControlFlow::Continue(())
2878
}
2879
2880
fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2881
let a = self.state[operands.src1].get_u32();
2882
let b = self.state[operands.src2].get_u32();
2883
match a.checked_add(b) {
2884
Some(c) => {
2885
self.state[operands.dst].set_u32(c);
2886
ControlFlow::Continue(())
2887
}
2888
None => self.done_trap::<crate::Xadd32UoverflowTrap>(),
2889
}
2890
}
2891
2892
fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2893
let a = self.state[operands.src1].get_u64();
2894
let b = self.state[operands.src2].get_u64();
2895
match a.checked_add(b) {
2896
Some(c) => {
2897
self.state[operands.dst].set_u64(c);
2898
ControlFlow::Continue(())
2899
}
2900
None => self.done_trap::<crate::Xadd64UoverflowTrap>(),
2901
}
2902
}
2903
2904
fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2905
let a = self.state[operands.src1].get_i64();
2906
let b = self.state[operands.src2].get_i64();
2907
let result = ((i128::from(a) * i128::from(b)) >> 64) as i64;
2908
self.state[operands.dst].set_i64(result);
2909
ControlFlow::Continue(())
2910
}
2911
2912
fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2913
let a = self.state[operands.src1].get_u64();
2914
let b = self.state[operands.src2].get_u64();
2915
let result = ((u128::from(a) * u128::from(b)) >> 64) as u64;
2916
self.state[operands.dst].set_u64(result);
2917
ControlFlow::Continue(())
2918
}
2919
2920
// =========================================================================
2921
// o32 addressing modes for big-endian X-registers
2922
2923
fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2924
let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? };
2925
self.state[dst].set_u32(u16::from_be(result).into());
2926
ControlFlow::Continue(())
2927
}
2928
2929
fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2930
let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? };
2931
self.state[dst].set_i32(i16::from_be(result).into());
2932
ControlFlow::Continue(())
2933
}
2934
2935
fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2936
let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? };
2937
self.state[dst].set_i32(i32::from_be(result));
2938
ControlFlow::Continue(())
2939
}
2940
2941
fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2942
let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? };
2943
self.state[dst].set_i64(i64::from_be(result));
2944
ControlFlow::Continue(())
2945
}
2946
2947
fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2948
let val = self.state[val].get_u32() as u16;
2949
unsafe {
2950
self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?;
2951
}
2952
ControlFlow::Continue(())
2953
}
2954
2955
fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2956
let val = self.state[val].get_u32();
2957
unsafe {
2958
self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?;
2959
}
2960
ControlFlow::Continue(())
2961
}
2962
2963
fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2964
let val = self.state[val].get_u64();
2965
unsafe {
2966
self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?;
2967
}
2968
ControlFlow::Continue(())
2969
}
2970
2971
// =========================================================================
2972
// o32 addressing modes for little-endian F-registers
2973
2974
fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2975
let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? };
2976
self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2977
ControlFlow::Continue(())
2978
}
2979
2980
fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2981
let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? };
2982
self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2983
ControlFlow::Continue(())
2984
}
2985
2986
fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2987
let val = self.state[src].get_f32();
2988
unsafe {
2989
self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?;
2990
}
2991
ControlFlow::Continue(())
2992
}
2993
2994
fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2995
let val = self.state[src].get_f64();
2996
unsafe {
2997
self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?;
2998
}
2999
ControlFlow::Continue(())
3000
}
3001
3002
// =========================================================================
3003
// o32 addressing modes for big-endian F-registers
3004
3005
fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3006
let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? };
3007
self.state[dst].set_f32(f32::from_bits(u32::from_be(val)));
3008
ControlFlow::Continue(())
3009
}
3010
3011
fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3012
let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? };
3013
self.state[dst].set_f64(f64::from_bits(u64::from_be(val)));
3014
ControlFlow::Continue(())
3015
}
3016
3017
fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3018
let val = self.state[src].get_f32();
3019
unsafe {
3020
self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?;
3021
}
3022
ControlFlow::Continue(())
3023
}
3024
3025
fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3026
let val = self.state[src].get_f64();
3027
unsafe {
3028
self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?;
3029
}
3030
ControlFlow::Continue(())
3031
}
3032
3033
// =========================================================================
3034
// z addressing modes for little-endian F-registers
3035
3036
fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3037
let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? };
3038
self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3039
ControlFlow::Continue(())
3040
}
3041
3042
fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3043
let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? };
3044
self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3045
ControlFlow::Continue(())
3046
}
3047
3048
fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3049
let val = self.state[src].get_f32();
3050
unsafe {
3051
self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?;
3052
}
3053
ControlFlow::Continue(())
3054
}
3055
3056
fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3057
let val = self.state[src].get_f64();
3058
unsafe {
3059
self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?;
3060
}
3061
ControlFlow::Continue(())
3062
}
3063
3064
// =========================================================================
3065
// g32 addressing modes for little-endian F-registers
3066
3067
fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3068
let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? };
3069
self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3070
ControlFlow::Continue(())
3071
}
3072
3073
fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3074
let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? };
3075
self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3076
ControlFlow::Continue(())
3077
}
3078
3079
fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3080
let val = self.state[src].get_f32();
3081
unsafe {
3082
self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?;
3083
}
3084
ControlFlow::Continue(())
3085
}
3086
3087
fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3088
let val = self.state[src].get_f64();
3089
unsafe {
3090
self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?;
3091
}
3092
ControlFlow::Continue(())
3093
}
3094
3095
// =========================================================================
3096
// o32 addressing modes for little-endian V-registers
3097
3098
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3099
fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> {
3100
let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? };
3101
self.state[dst].set_u128(u128::from_le(val));
3102
ControlFlow::Continue(())
3103
}
3104
3105
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3106
fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> {
3107
let val = self.state[src].get_u128();
3108
unsafe {
3109
self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?;
3110
}
3111
ControlFlow::Continue(())
3112
}
3113
3114
// =========================================================================
3115
// z addressing modes for little-endian V-registers
3116
3117
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3118
fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3119
let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? };
3120
self.state[dst].set_u128(u128::from_le(val));
3121
ControlFlow::Continue(())
3122
}
3123
3124
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3125
fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> {
3126
let val = self.state[src].get_u128();
3127
unsafe {
3128
self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?;
3129
}
3130
ControlFlow::Continue(())
3131
}
3132
3133
// =========================================================================
3134
// g32 addressing modes for little-endian V-registers
3135
3136
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3137
fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> {
3138
let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? };
3139
self.state[dst].set_u128(u128::from_le(val));
3140
ControlFlow::Continue(())
3141
}
3142
3143
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3144
fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> {
3145
let val = self.state[src].get_u128();
3146
unsafe {
3147
self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?;
3148
}
3149
ControlFlow::Continue(())
3150
}
3151
3152
fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> {
3153
let fp = self.state.fp;
3154
self.state[dst].set_ptr(fp);
3155
ControlFlow::Continue(())
3156
}
3157
3158
fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> {
3159
let lr = self.state.lr;
3160
self.state[dst].set_ptr(lr);
3161
ControlFlow::Continue(())
3162
}
3163
3164
fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3165
let val = self.state[src];
3166
self.state[dst] = val;
3167
ControlFlow::Continue(())
3168
}
3169
3170
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3171
fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3172
let val = self.state[src];
3173
self.state[dst] = val;
3174
ControlFlow::Continue(())
3175
}
3176
3177
fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> {
3178
self.state[dst].set_f32(f32::from_bits(bits));
3179
ControlFlow::Continue(())
3180
}
3181
3182
fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> {
3183
self.state[dst].set_f64(f64::from_bits(bits));
3184
ControlFlow::Continue(())
3185
}
3186
3187
fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3188
let val = self.state[src].get_f32();
3189
self.state[dst].set_u32(val.to_bits());
3190
ControlFlow::Continue(())
3191
}
3192
3193
fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3194
let val = self.state[src].get_f64();
3195
self.state[dst].set_u64(val.to_bits());
3196
ControlFlow::Continue(())
3197
}
3198
3199
fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3200
let val = self.state[src].get_u32();
3201
self.state[dst].set_f32(f32::from_bits(val));
3202
ControlFlow::Continue(())
3203
}
3204
3205
fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3206
let val = self.state[src].get_u64();
3207
self.state[dst].set_f64(f64::from_bits(val));
3208
ControlFlow::Continue(())
3209
}
3210
3211
fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3212
let a = self.state[src1].get_f32();
3213
let b = self.state[src2].get_f32();
3214
self.state[dst].set_u32(u32::from(a == b));
3215
ControlFlow::Continue(())
3216
}
3217
3218
fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3219
let a = self.state[src1].get_f32();
3220
let b = self.state[src2].get_f32();
3221
self.state[dst].set_u32(u32::from(a != b));
3222
ControlFlow::Continue(())
3223
}
3224
3225
fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3226
let a = self.state[src1].get_f32();
3227
let b = self.state[src2].get_f32();
3228
self.state[dst].set_u32(u32::from(a < b));
3229
ControlFlow::Continue(())
3230
}
3231
3232
fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3233
let a = self.state[src1].get_f32();
3234
let b = self.state[src2].get_f32();
3235
self.state[dst].set_u32(u32::from(a <= b));
3236
ControlFlow::Continue(())
3237
}
3238
3239
fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3240
let a = self.state[src1].get_f64();
3241
let b = self.state[src2].get_f64();
3242
self.state[dst].set_u32(u32::from(a == b));
3243
ControlFlow::Continue(())
3244
}
3245
3246
fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3247
let a = self.state[src1].get_f64();
3248
let b = self.state[src2].get_f64();
3249
self.state[dst].set_u32(u32::from(a != b));
3250
ControlFlow::Continue(())
3251
}
3252
3253
fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3254
let a = self.state[src1].get_f64();
3255
let b = self.state[src2].get_f64();
3256
self.state[dst].set_u32(u32::from(a < b));
3257
ControlFlow::Continue(())
3258
}
3259
3260
fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3261
let a = self.state[src1].get_f64();
3262
let b = self.state[src2].get_f64();
3263
self.state[dst].set_u32(u32::from(a <= b));
3264
ControlFlow::Continue(())
3265
}
3266
3267
fn fselect32(
3268
&mut self,
3269
dst: FReg,
3270
cond: XReg,
3271
if_nonzero: FReg,
3272
if_zero: FReg,
3273
) -> ControlFlow<Done> {
3274
let result = if self.state[cond].get_u32() != 0 {
3275
self.state[if_nonzero].get_f32()
3276
} else {
3277
self.state[if_zero].get_f32()
3278
};
3279
self.state[dst].set_f32(result);
3280
ControlFlow::Continue(())
3281
}
3282
3283
fn fselect64(
3284
&mut self,
3285
dst: FReg,
3286
cond: XReg,
3287
if_nonzero: FReg,
3288
if_zero: FReg,
3289
) -> ControlFlow<Done> {
3290
let result = if self.state[cond].get_u32() != 0 {
3291
self.state[if_nonzero].get_f64()
3292
} else {
3293
self.state[if_zero].get_f64()
3294
};
3295
self.state[dst].set_f64(result);
3296
ControlFlow::Continue(())
3297
}
3298
3299
fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3300
let a = self.state[src].get_i32();
3301
self.state[dst].set_f32(a as f32);
3302
ControlFlow::Continue(())
3303
}
3304
3305
fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3306
let a = self.state[src].get_u32();
3307
self.state[dst].set_f32(a as f32);
3308
ControlFlow::Continue(())
3309
}
3310
3311
fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3312
let a = self.state[src].get_i64();
3313
self.state[dst].set_f32(a as f32);
3314
ControlFlow::Continue(())
3315
}
3316
3317
fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3318
let a = self.state[src].get_u64();
3319
self.state[dst].set_f32(a as f32);
3320
ControlFlow::Continue(())
3321
}
3322
3323
fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3324
let a = self.state[src].get_i32();
3325
self.state[dst].set_f64(a as f64);
3326
ControlFlow::Continue(())
3327
}
3328
3329
fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3330
let a = self.state[src].get_u32();
3331
self.state[dst].set_f64(a as f64);
3332
ControlFlow::Continue(())
3333
}
3334
3335
fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3336
let a = self.state[src].get_i64();
3337
self.state[dst].set_f64(a as f64);
3338
ControlFlow::Continue(())
3339
}
3340
3341
fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3342
let a = self.state[src].get_u64();
3343
self.state[dst].set_f64(a as f64);
3344
ControlFlow::Continue(())
3345
}
3346
3347
fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3348
let a = self.state[src].get_f32();
3349
self.check_xnn_from_f32::<crate::X32FromF32S>(a, f32_cvt_to_int_bounds(true, 32))?;
3350
self.state[dst].set_i32(a as i32);
3351
ControlFlow::Continue(())
3352
}
3353
3354
fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3355
let a = self.state[src].get_f32();
3356
self.check_xnn_from_f32::<crate::X32FromF32U>(a, f32_cvt_to_int_bounds(false, 32))?;
3357
self.state[dst].set_u32(a as u32);
3358
ControlFlow::Continue(())
3359
}
3360
3361
fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3362
let a = self.state[src].get_f32();
3363
self.check_xnn_from_f32::<crate::X64FromF32S>(a, f32_cvt_to_int_bounds(true, 64))?;
3364
self.state[dst].set_i64(a as i64);
3365
ControlFlow::Continue(())
3366
}
3367
3368
fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3369
let a = self.state[src].get_f32();
3370
self.check_xnn_from_f32::<crate::X64FromF32U>(a, f32_cvt_to_int_bounds(false, 64))?;
3371
self.state[dst].set_u64(a as u64);
3372
ControlFlow::Continue(())
3373
}
3374
3375
fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3376
let a = self.state[src].get_f64();
3377
self.check_xnn_from_f64::<crate::X32FromF64S>(a, f64_cvt_to_int_bounds(true, 32))?;
3378
self.state[dst].set_i32(a as i32);
3379
ControlFlow::Continue(())
3380
}
3381
3382
fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3383
let a = self.state[src].get_f64();
3384
self.check_xnn_from_f64::<crate::X32FromF64U>(a, f64_cvt_to_int_bounds(false, 32))?;
3385
self.state[dst].set_u32(a as u32);
3386
ControlFlow::Continue(())
3387
}
3388
3389
fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3390
let a = self.state[src].get_f64();
3391
self.check_xnn_from_f64::<crate::X64FromF64S>(a, f64_cvt_to_int_bounds(true, 64))?;
3392
self.state[dst].set_i64(a as i64);
3393
ControlFlow::Continue(())
3394
}
3395
3396
fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3397
let a = self.state[src].get_f64();
3398
self.check_xnn_from_f64::<crate::X64FromF64U>(a, f64_cvt_to_int_bounds(false, 64))?;
3399
self.state[dst].set_u64(a as u64);
3400
ControlFlow::Continue(())
3401
}
3402
3403
fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3404
let a = self.state[src].get_f32();
3405
self.state[dst].set_i32(a as i32);
3406
ControlFlow::Continue(())
3407
}
3408
3409
fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3410
let a = self.state[src].get_f32();
3411
self.state[dst].set_u32(a as u32);
3412
ControlFlow::Continue(())
3413
}
3414
3415
fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3416
let a = self.state[src].get_f32();
3417
self.state[dst].set_i64(a as i64);
3418
ControlFlow::Continue(())
3419
}
3420
3421
fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3422
let a = self.state[src].get_f32();
3423
self.state[dst].set_u64(a as u64);
3424
ControlFlow::Continue(())
3425
}
3426
3427
fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3428
let a = self.state[src].get_f64();
3429
self.state[dst].set_i32(a as i32);
3430
ControlFlow::Continue(())
3431
}
3432
3433
fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3434
let a = self.state[src].get_f64();
3435
self.state[dst].set_u32(a as u32);
3436
ControlFlow::Continue(())
3437
}
3438
3439
fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3440
let a = self.state[src].get_f64();
3441
self.state[dst].set_i64(a as i64);
3442
ControlFlow::Continue(())
3443
}
3444
3445
fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3446
let a = self.state[src].get_f64();
3447
self.state[dst].set_u64(a as u64);
3448
ControlFlow::Continue(())
3449
}
3450
3451
fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3452
let a = self.state[src].get_f64();
3453
self.state[dst].set_f32(a as f32);
3454
ControlFlow::Continue(())
3455
}
3456
3457
fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3458
let a = self.state[src].get_f32();
3459
self.state[dst].set_f64(a.into());
3460
ControlFlow::Continue(())
3461
}
3462
3463
fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3464
let a = self.state[operands.src1].get_f32();
3465
let b = self.state[operands.src2].get_f32();
3466
self.state[operands.dst].set_f32(a.wasm_copysign(b));
3467
ControlFlow::Continue(())
3468
}
3469
3470
fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3471
let a = self.state[operands.src1].get_f64();
3472
let b = self.state[operands.src2].get_f64();
3473
self.state[operands.dst].set_f64(a.wasm_copysign(b));
3474
ControlFlow::Continue(())
3475
}
3476
3477
fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3478
let a = self.state[operands.src1].get_f32();
3479
let b = self.state[operands.src2].get_f32();
3480
self.state[operands.dst].set_f32(a + b);
3481
ControlFlow::Continue(())
3482
}
3483
3484
fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3485
let a = self.state[operands.src1].get_f32();
3486
let b = self.state[operands.src2].get_f32();
3487
self.state[operands.dst].set_f32(a - b);
3488
ControlFlow::Continue(())
3489
}
3490
3491
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3492
fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3493
let mut a = self.state[operands.src1].get_f32x4();
3494
let b = self.state[operands.src2].get_f32x4();
3495
for (a, b) in a.iter_mut().zip(b) {
3496
*a = *a - b;
3497
}
3498
self.state[operands.dst].set_f32x4(a);
3499
ControlFlow::Continue(())
3500
}
3501
3502
fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3503
let a = self.state[operands.src1].get_f32();
3504
let b = self.state[operands.src2].get_f32();
3505
self.state[operands.dst].set_f32(a * b);
3506
ControlFlow::Continue(())
3507
}
3508
3509
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3510
fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3511
let mut a = self.state[operands.src1].get_f32x4();
3512
let b = self.state[operands.src2].get_f32x4();
3513
for (a, b) in a.iter_mut().zip(b) {
3514
*a = *a * b;
3515
}
3516
self.state[operands.dst].set_f32x4(a);
3517
ControlFlow::Continue(())
3518
}
3519
3520
fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3521
let a = self.state[operands.src1].get_f32();
3522
let b = self.state[operands.src2].get_f32();
3523
self.state[operands.dst].set_f32(a / b);
3524
ControlFlow::Continue(())
3525
}
3526
3527
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3528
fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3529
let a = self.state[operands.src1].get_f32x4();
3530
let b = self.state[operands.src2].get_f32x4();
3531
let mut result = [0.0f32; 4];
3532
3533
for i in 0..4 {
3534
result[i] = a[i] / b[i];
3535
}
3536
3537
self.state[operands.dst].set_f32x4(result);
3538
ControlFlow::Continue(())
3539
}
3540
3541
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3542
fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3543
let a = self.state[operands.src1].get_f64x2();
3544
let b = self.state[operands.src2].get_f64x2();
3545
let mut result = [0.0f64; 2];
3546
3547
for i in 0..2 {
3548
result[i] = a[i] / b[i];
3549
}
3550
3551
self.state[operands.dst].set_f64x2(result);
3552
ControlFlow::Continue(())
3553
}
3554
3555
fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3556
let a = self.state[operands.src1].get_f32();
3557
let b = self.state[operands.src2].get_f32();
3558
self.state[operands.dst].set_f32(a.wasm_maximum(b));
3559
ControlFlow::Continue(())
3560
}
3561
3562
fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3563
let a = self.state[operands.src1].get_f32();
3564
let b = self.state[operands.src2].get_f32();
3565
self.state[operands.dst].set_f32(a.wasm_minimum(b));
3566
ControlFlow::Continue(())
3567
}
3568
3569
fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3570
let a = self.state[src].get_f32();
3571
self.state[dst].set_f32(a.wasm_trunc());
3572
ControlFlow::Continue(())
3573
}
3574
3575
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3576
fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3577
let mut a = self.state[src].get_f32x4();
3578
for elem in a.iter_mut() {
3579
*elem = elem.wasm_trunc();
3580
}
3581
self.state[dst].set_f32x4(a);
3582
ControlFlow::Continue(())
3583
}
3584
3585
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3586
fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3587
let mut a = self.state[src].get_f64x2();
3588
for elem in a.iter_mut() {
3589
*elem = elem.wasm_trunc();
3590
}
3591
self.state[dst].set_f64x2(a);
3592
ControlFlow::Continue(())
3593
}
3594
3595
fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3596
let a = self.state[src].get_f32();
3597
self.state[dst].set_f32(a.wasm_floor());
3598
ControlFlow::Continue(())
3599
}
3600
3601
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3602
fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3603
let mut a = self.state[src].get_f32x4();
3604
for elem in a.iter_mut() {
3605
*elem = elem.wasm_floor();
3606
}
3607
self.state[dst].set_f32x4(a);
3608
ControlFlow::Continue(())
3609
}
3610
3611
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3612
fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3613
let mut a = self.state[src].get_f64x2();
3614
for elem in a.iter_mut() {
3615
*elem = elem.wasm_floor();
3616
}
3617
self.state[dst].set_f64x2(a);
3618
ControlFlow::Continue(())
3619
}
3620
3621
fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3622
let a = self.state[src].get_f32();
3623
self.state[dst].set_f32(a.wasm_ceil());
3624
ControlFlow::Continue(())
3625
}
3626
3627
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3628
fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3629
let mut a = self.state[src].get_f32x4();
3630
for elem in a.iter_mut() {
3631
*elem = elem.wasm_ceil();
3632
}
3633
self.state[dst].set_f32x4(a);
3634
3635
ControlFlow::Continue(())
3636
}
3637
3638
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3639
fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3640
let mut a = self.state[src].get_f64x2();
3641
for elem in a.iter_mut() {
3642
*elem = elem.wasm_ceil();
3643
}
3644
self.state[dst].set_f64x2(a);
3645
3646
ControlFlow::Continue(())
3647
}
3648
3649
fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3650
let a = self.state[src].get_f32();
3651
self.state[dst].set_f32(a.wasm_nearest());
3652
ControlFlow::Continue(())
3653
}
3654
3655
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3656
fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3657
let mut a = self.state[src].get_f32x4();
3658
for elem in a.iter_mut() {
3659
*elem = elem.wasm_nearest();
3660
}
3661
self.state[dst].set_f32x4(a);
3662
ControlFlow::Continue(())
3663
}
3664
3665
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3666
fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3667
let mut a = self.state[src].get_f64x2();
3668
for elem in a.iter_mut() {
3669
*elem = elem.wasm_nearest();
3670
}
3671
self.state[dst].set_f64x2(a);
3672
ControlFlow::Continue(())
3673
}
3674
3675
fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3676
let a = self.state[src].get_f32();
3677
self.state[dst].set_f32(a.wasm_sqrt());
3678
ControlFlow::Continue(())
3679
}
3680
3681
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3682
fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3683
let mut a = self.state[src].get_f32x4();
3684
for elem in a.iter_mut() {
3685
*elem = elem.wasm_sqrt();
3686
}
3687
self.state[dst].set_f32x4(a);
3688
ControlFlow::Continue(())
3689
}
3690
3691
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3692
fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3693
let mut a = self.state[src].get_f64x2();
3694
for elem in a.iter_mut() {
3695
*elem = elem.wasm_sqrt();
3696
}
3697
self.state[dst].set_f64x2(a);
3698
ControlFlow::Continue(())
3699
}
3700
3701
fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3702
let a = self.state[src].get_f32();
3703
self.state[dst].set_f32(-a);
3704
ControlFlow::Continue(())
3705
}
3706
3707
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3708
fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3709
let mut a = self.state[src].get_f32x4();
3710
for elem in a.iter_mut() {
3711
*elem = -*elem;
3712
}
3713
self.state[dst].set_f32x4(a);
3714
ControlFlow::Continue(())
3715
}
3716
3717
fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3718
let a = self.state[src].get_f32();
3719
self.state[dst].set_f32(a.wasm_abs());
3720
ControlFlow::Continue(())
3721
}
3722
3723
fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3724
let a = self.state[operands.src1].get_f64();
3725
let b = self.state[operands.src2].get_f64();
3726
self.state[operands.dst].set_f64(a + b);
3727
ControlFlow::Continue(())
3728
}
3729
3730
fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3731
let a = self.state[operands.src1].get_f64();
3732
let b = self.state[operands.src2].get_f64();
3733
self.state[operands.dst].set_f64(a - b);
3734
ControlFlow::Continue(())
3735
}
3736
3737
fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3738
let a = self.state[operands.src1].get_f64();
3739
let b = self.state[operands.src2].get_f64();
3740
self.state[operands.dst].set_f64(a * b);
3741
ControlFlow::Continue(())
3742
}
3743
3744
fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3745
let a = self.state[operands.src1].get_f64();
3746
let b = self.state[operands.src2].get_f64();
3747
self.state[operands.dst].set_f64(a / b);
3748
ControlFlow::Continue(())
3749
}
3750
3751
fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3752
let a = self.state[operands.src1].get_f64();
3753
let b = self.state[operands.src2].get_f64();
3754
self.state[operands.dst].set_f64(a.wasm_maximum(b));
3755
ControlFlow::Continue(())
3756
}
3757
3758
fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3759
let a = self.state[operands.src1].get_f64();
3760
let b = self.state[operands.src2].get_f64();
3761
self.state[operands.dst].set_f64(a.wasm_minimum(b));
3762
ControlFlow::Continue(())
3763
}
3764
3765
fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3766
let a = self.state[src].get_f64();
3767
self.state[dst].set_f64(a.wasm_trunc());
3768
ControlFlow::Continue(())
3769
}
3770
3771
fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3772
let a = self.state[src].get_f64();
3773
self.state[dst].set_f64(a.wasm_floor());
3774
ControlFlow::Continue(())
3775
}
3776
3777
fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3778
let a = self.state[src].get_f64();
3779
self.state[dst].set_f64(a.wasm_ceil());
3780
ControlFlow::Continue(())
3781
}
3782
3783
fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3784
let a = self.state[src].get_f64();
3785
self.state[dst].set_f64(a.wasm_nearest());
3786
ControlFlow::Continue(())
3787
}
3788
3789
fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3790
let a = self.state[src].get_f64();
3791
self.state[dst].set_f64(a.wasm_sqrt());
3792
ControlFlow::Continue(())
3793
}
3794
3795
fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3796
let a = self.state[src].get_f64();
3797
self.state[dst].set_f64(-a);
3798
ControlFlow::Continue(())
3799
}
3800
3801
fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3802
let a = self.state[src].get_f64();
3803
self.state[dst].set_f64(a.wasm_abs());
3804
ControlFlow::Continue(())
3805
}
3806
3807
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3808
fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3809
let mut a = self.state[operands.src1].get_i8x16();
3810
let b = self.state[operands.src2].get_i8x16();
3811
for (a, b) in a.iter_mut().zip(b) {
3812
*a = a.wrapping_add(b);
3813
}
3814
self.state[operands.dst].set_i8x16(a);
3815
ControlFlow::Continue(())
3816
}
3817
3818
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3819
fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3820
let mut a = self.state[operands.src1].get_i16x8();
3821
let b = self.state[operands.src2].get_i16x8();
3822
for (a, b) in a.iter_mut().zip(b) {
3823
*a = a.wrapping_add(b);
3824
}
3825
self.state[operands.dst].set_i16x8(a);
3826
ControlFlow::Continue(())
3827
}
3828
3829
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3830
fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3831
let mut a = self.state[operands.src1].get_i32x4();
3832
let b = self.state[operands.src2].get_i32x4();
3833
for (a, b) in a.iter_mut().zip(b) {
3834
*a = a.wrapping_add(b);
3835
}
3836
self.state[operands.dst].set_i32x4(a);
3837
ControlFlow::Continue(())
3838
}
3839
3840
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3841
fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3842
let mut a = self.state[operands.src1].get_i64x2();
3843
let b = self.state[operands.src2].get_i64x2();
3844
for (a, b) in a.iter_mut().zip(b) {
3845
*a = a.wrapping_add(b);
3846
}
3847
self.state[operands.dst].set_i64x2(a);
3848
ControlFlow::Continue(())
3849
}
3850
3851
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3852
fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3853
let mut a = self.state[operands.src1].get_f32x4();
3854
let b = self.state[operands.src2].get_f32x4();
3855
for (a, b) in a.iter_mut().zip(b) {
3856
*a += b;
3857
}
3858
self.state[operands.dst].set_f32x4(a);
3859
ControlFlow::Continue(())
3860
}
3861
3862
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3863
fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3864
let mut a = self.state[operands.src1].get_f64x2();
3865
let b = self.state[operands.src2].get_f64x2();
3866
for (a, b) in a.iter_mut().zip(b) {
3867
*a += b;
3868
}
3869
self.state[operands.dst].set_f64x2(a);
3870
ControlFlow::Continue(())
3871
}
3872
3873
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3874
fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3875
let mut a = self.state[operands.src1].get_i8x16();
3876
let b = self.state[operands.src2].get_i8x16();
3877
for (a, b) in a.iter_mut().zip(b) {
3878
*a = (*a).saturating_add(b);
3879
}
3880
self.state[operands.dst].set_i8x16(a);
3881
ControlFlow::Continue(())
3882
}
3883
3884
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3885
fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3886
let mut a = self.state[operands.src1].get_u8x16();
3887
let b = self.state[operands.src2].get_u8x16();
3888
for (a, b) in a.iter_mut().zip(b) {
3889
*a = (*a).saturating_add(b);
3890
}
3891
self.state[operands.dst].set_u8x16(a);
3892
ControlFlow::Continue(())
3893
}
3894
3895
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3896
fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3897
let mut a = self.state[operands.src1].get_i16x8();
3898
let b = self.state[operands.src2].get_i16x8();
3899
for (a, b) in a.iter_mut().zip(b) {
3900
*a = (*a).saturating_add(b);
3901
}
3902
self.state[operands.dst].set_i16x8(a);
3903
ControlFlow::Continue(())
3904
}
3905
3906
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3907
fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3908
let mut a = self.state[operands.src1].get_u16x8();
3909
let b = self.state[operands.src2].get_u16x8();
3910
for (a, b) in a.iter_mut().zip(b) {
3911
*a = (*a).saturating_add(b);
3912
}
3913
self.state[operands.dst].set_u16x8(a);
3914
ControlFlow::Continue(())
3915
}
3916
3917
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3918
fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3919
let a = self.state[operands.src1].get_i16x8();
3920
let b = self.state[operands.src2].get_i16x8();
3921
let mut result = [0i16; 8];
3922
let half = result.len() / 2;
3923
for i in 0..half {
3924
result[i] = a[2 * i].wrapping_add(a[2 * i + 1]);
3925
result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]);
3926
}
3927
self.state[operands.dst].set_i16x8(result);
3928
ControlFlow::Continue(())
3929
}
3930
3931
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3932
fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3933
let a = self.state[operands.src1].get_i32x4();
3934
let b = self.state[operands.src2].get_i32x4();
3935
let mut result = [0i32; 4];
3936
result[0] = a[0].wrapping_add(a[1]);
3937
result[1] = a[2].wrapping_add(a[3]);
3938
result[2] = b[0].wrapping_add(b[1]);
3939
result[3] = b[2].wrapping_add(b[3]);
3940
self.state[operands.dst].set_i32x4(result);
3941
ControlFlow::Continue(())
3942
}
3943
3944
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3945
fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3946
let a = self.state[operands.src1].get_i8x16();
3947
let b = self.state[operands.src2].get_u32();
3948
self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b)));
3949
ControlFlow::Continue(())
3950
}
3951
3952
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3953
fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3954
let a = self.state[operands.src1].get_i16x8();
3955
let b = self.state[operands.src2].get_u32();
3956
self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b)));
3957
ControlFlow::Continue(())
3958
}
3959
3960
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3961
fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3962
let a = self.state[operands.src1].get_i32x4();
3963
let b = self.state[operands.src2].get_u32();
3964
self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b)));
3965
ControlFlow::Continue(())
3966
}
3967
3968
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3969
fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3970
let a = self.state[operands.src1].get_i64x2();
3971
let b = self.state[operands.src2].get_u32();
3972
self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b)));
3973
ControlFlow::Continue(())
3974
}
3975
3976
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3977
fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3978
let a = self.state[operands.src1].get_i8x16();
3979
let b = self.state[operands.src2].get_u32();
3980
self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b)));
3981
ControlFlow::Continue(())
3982
}
3983
3984
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3985
fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3986
let a = self.state[operands.src1].get_i16x8();
3987
let b = self.state[operands.src2].get_u32();
3988
self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b)));
3989
ControlFlow::Continue(())
3990
}
3991
3992
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3993
fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3994
let a = self.state[operands.src1].get_i32x4();
3995
let b = self.state[operands.src2].get_u32();
3996
self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b)));
3997
ControlFlow::Continue(())
3998
}
3999
4000
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4001
fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4002
let a = self.state[operands.src1].get_i64x2();
4003
let b = self.state[operands.src2].get_u32();
4004
self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b)));
4005
ControlFlow::Continue(())
4006
}
4007
4008
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4009
fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4010
let a = self.state[operands.src1].get_u8x16();
4011
let b = self.state[operands.src2].get_u32();
4012
self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b)));
4013
ControlFlow::Continue(())
4014
}
4015
4016
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4017
fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4018
let a = self.state[operands.src1].get_u16x8();
4019
let b = self.state[operands.src2].get_u32();
4020
self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b)));
4021
ControlFlow::Continue(())
4022
}
4023
4024
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4025
fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4026
let a = self.state[operands.src1].get_u32x4();
4027
let b = self.state[operands.src2].get_u32();
4028
self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b)));
4029
ControlFlow::Continue(())
4030
}
4031
4032
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4033
fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4034
let a = self.state[operands.src1].get_u64x2();
4035
let b = self.state[operands.src2].get_u32();
4036
self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b)));
4037
ControlFlow::Continue(())
4038
}
4039
4040
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4041
fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> {
4042
self.state[dst].set_u128(val);
4043
ControlFlow::Continue(())
4044
}
4045
4046
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4047
fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4048
let val = self.state[src].get_u32() as u8;
4049
self.state[dst].set_u8x16([val; 16]);
4050
ControlFlow::Continue(())
4051
}
4052
4053
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4054
fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4055
let val = self.state[src].get_u32() as u16;
4056
self.state[dst].set_u16x8([val; 8]);
4057
ControlFlow::Continue(())
4058
}
4059
4060
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4061
fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4062
let val = self.state[src].get_u32();
4063
self.state[dst].set_u32x4([val; 4]);
4064
ControlFlow::Continue(())
4065
}
4066
4067
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4068
fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4069
let val = self.state[src].get_u64();
4070
self.state[dst].set_u64x2([val; 2]);
4071
ControlFlow::Continue(())
4072
}
4073
4074
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4075
fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4076
let val = self.state[src].get_f32();
4077
self.state[dst].set_f32x4([val; 4]);
4078
ControlFlow::Continue(())
4079
}
4080
4081
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4082
fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4083
let val = self.state[src].get_f64();
4084
self.state[dst].set_f64x2([val; 2]);
4085
ControlFlow::Continue(())
4086
}
4087
4088
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4089
fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4090
let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? };
4091
self.state[dst].set_i16x8(val.map(|i| i.into()));
4092
ControlFlow::Continue(())
4093
}
4094
4095
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4096
fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4097
let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? };
4098
self.state[dst].set_u16x8(val.map(|i| i.into()));
4099
ControlFlow::Continue(())
4100
}
4101
4102
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4103
fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4104
let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? };
4105
self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
4106
ControlFlow::Continue(())
4107
}
4108
4109
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4110
fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4111
let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? };
4112
self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
4113
ControlFlow::Continue(())
4114
}
4115
4116
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4117
fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4118
let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? };
4119
self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
4120
ControlFlow::Continue(())
4121
}
4122
4123
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4124
fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4125
let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? };
4126
self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
4127
ControlFlow::Continue(())
4128
}
4129
4130
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4131
fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4132
let a = self.state[operands.src1].get_u128();
4133
let b = self.state[operands.src2].get_u128();
4134
self.state[operands.dst].set_u128(a & b);
4135
ControlFlow::Continue(())
4136
}
4137
4138
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4139
fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4140
let a = self.state[operands.src1].get_u128();
4141
let b = self.state[operands.src2].get_u128();
4142
self.state[operands.dst].set_u128(a | b);
4143
ControlFlow::Continue(())
4144
}
4145
4146
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4147
fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4148
let a = self.state[operands.src1].get_u128();
4149
let b = self.state[operands.src2].get_u128();
4150
self.state[operands.dst].set_u128(a ^ b);
4151
ControlFlow::Continue(())
4152
}
4153
4154
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4155
fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4156
let a = self.state[src].get_u128();
4157
self.state[dst].set_u128(!a);
4158
ControlFlow::Continue(())
4159
}
4160
4161
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4162
fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> {
4163
let c = self.state[c].get_u128();
4164
let x = self.state[x].get_u128();
4165
let y = self.state[y].get_u128();
4166
self.state[dst].set_u128((c & x) | (!c & y));
4167
ControlFlow::Continue(())
4168
}
4169
4170
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4171
fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4172
let a = self.state[src].get_u8x16();
4173
let mut result = 0;
4174
for item in a.iter().rev() {
4175
result <<= 1;
4176
result |= (*item >> 7) as u32;
4177
}
4178
self.state[dst].set_u32(result);
4179
ControlFlow::Continue(())
4180
}
4181
4182
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4183
fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4184
let a = self.state[src].get_u16x8();
4185
let mut result = 0;
4186
for item in a.iter().rev() {
4187
result <<= 1;
4188
result |= (*item >> 15) as u32;
4189
}
4190
self.state[dst].set_u32(result);
4191
ControlFlow::Continue(())
4192
}
4193
4194
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4195
fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4196
let a = self.state[src].get_u32x4();
4197
let mut result = 0;
4198
for item in a.iter().rev() {
4199
result <<= 1;
4200
result |= *item >> 31;
4201
}
4202
self.state[dst].set_u32(result);
4203
ControlFlow::Continue(())
4204
}
4205
4206
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4207
fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4208
let a = self.state[src].get_u64x2();
4209
let mut result = 0;
4210
for item in a.iter().rev() {
4211
result <<= 1;
4212
result |= (*item >> 63) as u32;
4213
}
4214
self.state[dst].set_u32(result);
4215
ControlFlow::Continue(())
4216
}
4217
4218
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4219
fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4220
let a = self.state[src].get_u8x16();
4221
let result = a.iter().all(|a| *a != 0);
4222
self.state[dst].set_u32(u32::from(result));
4223
ControlFlow::Continue(())
4224
}
4225
4226
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4227
fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4228
let a = self.state[src].get_u16x8();
4229
let result = a.iter().all(|a| *a != 0);
4230
self.state[dst].set_u32(u32::from(result));
4231
ControlFlow::Continue(())
4232
}
4233
4234
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4235
fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4236
let a = self.state[src].get_u32x4();
4237
let result = a.iter().all(|a| *a != 0);
4238
self.state[dst].set_u32(u32::from(result));
4239
ControlFlow::Continue(())
4240
}
4241
4242
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4243
fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4244
let a = self.state[src].get_u64x2();
4245
let result = a.iter().all(|a| *a != 0);
4246
self.state[dst].set_u32(u32::from(result));
4247
ControlFlow::Continue(())
4248
}
4249
4250
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4251
fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4252
let a = self.state[src].get_u8x16();
4253
let result = a.iter().any(|a| *a != 0);
4254
self.state[dst].set_u32(u32::from(result));
4255
ControlFlow::Continue(())
4256
}
4257
4258
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4259
fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4260
let a = self.state[src].get_u16x8();
4261
let result = a.iter().any(|a| *a != 0);
4262
self.state[dst].set_u32(u32::from(result));
4263
ControlFlow::Continue(())
4264
}
4265
4266
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4267
fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4268
let a = self.state[src].get_u32x4();
4269
let result = a.iter().any(|a| *a != 0);
4270
self.state[dst].set_u32(u32::from(result));
4271
ControlFlow::Continue(())
4272
}
4273
4274
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4275
fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4276
let a = self.state[src].get_u64x2();
4277
let result = a.iter().any(|a| *a != 0);
4278
self.state[dst].set_u32(u32::from(result));
4279
ControlFlow::Continue(())
4280
}
4281
4282
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4283
fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4284
let a = self.state[src].get_i32x4();
4285
self.state[dst].set_f32x4(a.map(|i| i as f32));
4286
ControlFlow::Continue(())
4287
}
4288
4289
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4290
fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4291
let a = self.state[src].get_u32x4();
4292
self.state[dst].set_f32x4(a.map(|i| i as f32));
4293
ControlFlow::Continue(())
4294
}
4295
4296
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4297
fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4298
let a = self.state[src].get_i64x2();
4299
self.state[dst].set_f64x2(a.map(|i| i as f64));
4300
ControlFlow::Continue(())
4301
}
4302
4303
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4304
fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4305
let a = self.state[src].get_u64x2();
4306
self.state[dst].set_f64x2(a.map(|i| i as f64));
4307
ControlFlow::Continue(())
4308
}
4309
4310
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4311
fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4312
let a = self.state[src].get_f32x4();
4313
self.state[dst].set_i32x4(a.map(|f| f as i32));
4314
ControlFlow::Continue(())
4315
}
4316
4317
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4318
fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4319
let a = self.state[src].get_f32x4();
4320
self.state[dst].set_u32x4(a.map(|f| f as u32));
4321
ControlFlow::Continue(())
4322
}
4323
4324
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4325
fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4326
let a = self.state[src].get_f64x2();
4327
self.state[dst].set_i64x2(a.map(|f| f as i64));
4328
ControlFlow::Continue(())
4329
}
4330
4331
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4332
fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4333
let a = self.state[src].get_f64x2();
4334
self.state[dst].set_u64x2(a.map(|f| f as u64));
4335
ControlFlow::Continue(())
4336
}
4337
4338
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4339
fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4340
let a = *self.state[src].get_i8x16().first_chunk().unwrap();
4341
self.state[dst].set_i16x8(a.map(|i| i.into()));
4342
ControlFlow::Continue(())
4343
}
4344
4345
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4346
fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4347
let a = *self.state[src].get_u8x16().first_chunk().unwrap();
4348
self.state[dst].set_u16x8(a.map(|i| i.into()));
4349
ControlFlow::Continue(())
4350
}
4351
4352
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4353
fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4354
let a = *self.state[src].get_i16x8().first_chunk().unwrap();
4355
self.state[dst].set_i32x4(a.map(|i| i.into()));
4356
ControlFlow::Continue(())
4357
}
4358
4359
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4360
fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4361
let a = *self.state[src].get_u16x8().first_chunk().unwrap();
4362
self.state[dst].set_u32x4(a.map(|i| i.into()));
4363
ControlFlow::Continue(())
4364
}
4365
4366
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4367
fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4368
let a = *self.state[src].get_i32x4().first_chunk().unwrap();
4369
self.state[dst].set_i64x2(a.map(|i| i.into()));
4370
ControlFlow::Continue(())
4371
}
4372
4373
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4374
fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4375
let a = *self.state[src].get_u32x4().first_chunk().unwrap();
4376
self.state[dst].set_u64x2(a.map(|i| i.into()));
4377
ControlFlow::Continue(())
4378
}
4379
4380
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4381
fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4382
let a = *self.state[src].get_i8x16().last_chunk().unwrap();
4383
self.state[dst].set_i16x8(a.map(|i| i.into()));
4384
ControlFlow::Continue(())
4385
}
4386
4387
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4388
fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4389
let a = *self.state[src].get_u8x16().last_chunk().unwrap();
4390
self.state[dst].set_u16x8(a.map(|i| i.into()));
4391
ControlFlow::Continue(())
4392
}
4393
4394
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4395
fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4396
let a = *self.state[src].get_i16x8().last_chunk().unwrap();
4397
self.state[dst].set_i32x4(a.map(|i| i.into()));
4398
ControlFlow::Continue(())
4399
}
4400
4401
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4402
fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4403
let a = *self.state[src].get_u16x8().last_chunk().unwrap();
4404
self.state[dst].set_u32x4(a.map(|i| i.into()));
4405
ControlFlow::Continue(())
4406
}
4407
4408
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4409
fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4410
let a = *self.state[src].get_i32x4().last_chunk().unwrap();
4411
self.state[dst].set_i64x2(a.map(|i| i.into()));
4412
ControlFlow::Continue(())
4413
}
4414
4415
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4416
fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4417
let a = *self.state[src].get_u32x4().last_chunk().unwrap();
4418
self.state[dst].set_u64x2(a.map(|i| i.into()));
4419
ControlFlow::Continue(())
4420
}
4421
4422
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4423
fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4424
let a = self.state[operands.src1].get_i16x8();
4425
let b = self.state[operands.src2].get_i16x8();
4426
let mut result = [0; 16];
4427
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4428
*d = (*i)
4429
.try_into()
4430
.unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX });
4431
}
4432
self.state[operands.dst].set_i8x16(result);
4433
ControlFlow::Continue(())
4434
}
4435
4436
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4437
fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4438
let a = self.state[operands.src1].get_i16x8();
4439
let b = self.state[operands.src2].get_i16x8();
4440
let mut result = [0; 16];
4441
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4442
*d = (*i)
4443
.try_into()
4444
.unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX });
4445
}
4446
self.state[operands.dst].set_u8x16(result);
4447
ControlFlow::Continue(())
4448
}
4449
4450
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4451
fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4452
let a = self.state[operands.src1].get_i32x4();
4453
let b = self.state[operands.src2].get_i32x4();
4454
let mut result = [0; 8];
4455
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4456
*d = (*i)
4457
.try_into()
4458
.unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX });
4459
}
4460
self.state[operands.dst].set_i16x8(result);
4461
ControlFlow::Continue(())
4462
}
4463
4464
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4465
fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4466
let a = self.state[operands.src1].get_i32x4();
4467
let b = self.state[operands.src2].get_i32x4();
4468
let mut result = [0; 8];
4469
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4470
*d = (*i)
4471
.try_into()
4472
.unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX });
4473
}
4474
self.state[operands.dst].set_u16x8(result);
4475
ControlFlow::Continue(())
4476
}
4477
4478
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4479
fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4480
let a = self.state[operands.src1].get_i64x2();
4481
let b = self.state[operands.src2].get_i64x2();
4482
let mut result = [0; 4];
4483
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4484
*d = (*i)
4485
.try_into()
4486
.unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX });
4487
}
4488
self.state[operands.dst].set_i32x4(result);
4489
ControlFlow::Continue(())
4490
}
4491
4492
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4493
fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4494
let a = self.state[operands.src1].get_i64x2();
4495
let b = self.state[operands.src2].get_i64x2();
4496
let mut result = [0; 4];
4497
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4498
*d = (*i)
4499
.try_into()
4500
.unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX });
4501
}
4502
self.state[operands.dst].set_u32x4(result);
4503
ControlFlow::Continue(())
4504
}
4505
4506
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4507
fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4508
let a = self.state[operands.src1].get_u64x2();
4509
let b = self.state[operands.src2].get_u64x2();
4510
let mut result = [0; 4];
4511
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4512
*d = (*i).try_into().unwrap_or(u32::MAX);
4513
}
4514
self.state[operands.dst].set_u32x4(result);
4515
ControlFlow::Continue(())
4516
}
4517
4518
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4519
fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4520
let a = self.state[src].get_f32x4();
4521
self.state[dst].set_f64x2([a[0].into(), a[1].into()]);
4522
ControlFlow::Continue(())
4523
}
4524
4525
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4526
fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4527
let a = self.state[src].get_f64x2();
4528
self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]);
4529
ControlFlow::Continue(())
4530
}
4531
4532
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4533
fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4534
let mut a = self.state[operands.src1].get_i8x16();
4535
let b = self.state[operands.src2].get_i8x16();
4536
for (a, b) in a.iter_mut().zip(b) {
4537
*a = a.wrapping_sub(b);
4538
}
4539
self.state[operands.dst].set_i8x16(a);
4540
ControlFlow::Continue(())
4541
}
4542
4543
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4544
fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4545
let mut a = self.state[operands.src1].get_i16x8();
4546
let b = self.state[operands.src2].get_i16x8();
4547
for (a, b) in a.iter_mut().zip(b) {
4548
*a = a.wrapping_sub(b);
4549
}
4550
self.state[operands.dst].set_i16x8(a);
4551
ControlFlow::Continue(())
4552
}
4553
4554
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4555
fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4556
let mut a = self.state[operands.src1].get_i32x4();
4557
let b = self.state[operands.src2].get_i32x4();
4558
for (a, b) in a.iter_mut().zip(b) {
4559
*a = a.wrapping_sub(b);
4560
}
4561
self.state[operands.dst].set_i32x4(a);
4562
ControlFlow::Continue(())
4563
}
4564
4565
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4566
fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4567
let mut a = self.state[operands.src1].get_i64x2();
4568
let b = self.state[operands.src2].get_i64x2();
4569
for (a, b) in a.iter_mut().zip(b) {
4570
*a = a.wrapping_sub(b);
4571
}
4572
self.state[operands.dst].set_i64x2(a);
4573
ControlFlow::Continue(())
4574
}
4575
4576
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4577
fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4578
let mut a = self.state[operands.src1].get_i8x16();
4579
let b = self.state[operands.src2].get_i8x16();
4580
for (a, b) in a.iter_mut().zip(b) {
4581
*a = a.saturating_sub(b);
4582
}
4583
self.state[operands.dst].set_i8x16(a);
4584
ControlFlow::Continue(())
4585
}
4586
4587
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4588
fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4589
let mut a = self.state[operands.src1].get_u8x16();
4590
let b = self.state[operands.src2].get_u8x16();
4591
for (a, b) in a.iter_mut().zip(b) {
4592
*a = a.saturating_sub(b);
4593
}
4594
self.state[operands.dst].set_u8x16(a);
4595
ControlFlow::Continue(())
4596
}
4597
4598
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4599
fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4600
let mut a = self.state[operands.src1].get_i16x8();
4601
let b = self.state[operands.src2].get_i16x8();
4602
for (a, b) in a.iter_mut().zip(b) {
4603
*a = a.saturating_sub(b);
4604
}
4605
self.state[operands.dst].set_i16x8(a);
4606
ControlFlow::Continue(())
4607
}
4608
4609
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4610
fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4611
let mut a = self.state[operands.src1].get_u16x8();
4612
let b = self.state[operands.src2].get_u16x8();
4613
for (a, b) in a.iter_mut().zip(b) {
4614
*a = a.saturating_sub(b);
4615
}
4616
self.state[operands.dst].set_u16x8(a);
4617
ControlFlow::Continue(())
4618
}
4619
4620
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4621
fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4622
let mut a = self.state[operands.src1].get_f64x2();
4623
let b = self.state[operands.src2].get_f64x2();
4624
for (a, b) in a.iter_mut().zip(b) {
4625
*a = *a - b;
4626
}
4627
self.state[operands.dst].set_f64x2(a);
4628
ControlFlow::Continue(())
4629
}
4630
4631
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4632
fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4633
let mut a = self.state[operands.src1].get_i8x16();
4634
let b = self.state[operands.src2].get_i8x16();
4635
for (a, b) in a.iter_mut().zip(b) {
4636
*a = a.wrapping_mul(b);
4637
}
4638
self.state[operands.dst].set_i8x16(a);
4639
ControlFlow::Continue(())
4640
}
4641
4642
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4643
fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4644
let mut a = self.state[operands.src1].get_i16x8();
4645
let b = self.state[operands.src2].get_i16x8();
4646
for (a, b) in a.iter_mut().zip(b) {
4647
*a = a.wrapping_mul(b);
4648
}
4649
self.state[operands.dst].set_i16x8(a);
4650
ControlFlow::Continue(())
4651
}
4652
4653
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4654
fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4655
let mut a = self.state[operands.src1].get_i32x4();
4656
let b = self.state[operands.src2].get_i32x4();
4657
for (a, b) in a.iter_mut().zip(b) {
4658
*a = a.wrapping_mul(b);
4659
}
4660
self.state[operands.dst].set_i32x4(a);
4661
ControlFlow::Continue(())
4662
}
4663
4664
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4665
fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4666
let mut a = self.state[operands.src1].get_i64x2();
4667
let b = self.state[operands.src2].get_i64x2();
4668
for (a, b) in a.iter_mut().zip(b) {
4669
*a = a.wrapping_mul(b);
4670
}
4671
self.state[operands.dst].set_i64x2(a);
4672
ControlFlow::Continue(())
4673
}
4674
4675
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4676
fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4677
let mut a = self.state[operands.src1].get_f64x2();
4678
let b = self.state[operands.src2].get_f64x2();
4679
for (a, b) in a.iter_mut().zip(b) {
4680
*a = *a * b;
4681
}
4682
self.state[operands.dst].set_f64x2(a);
4683
ControlFlow::Continue(())
4684
}
4685
4686
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4687
fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4688
let mut a = self.state[operands.src1].get_i16x8();
4689
let b = self.state[operands.src2].get_i16x8();
4690
const MIN: i32 = i16::MIN as i32;
4691
const MAX: i32 = i16::MAX as i32;
4692
for (a, b) in a.iter_mut().zip(b) {
4693
let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
4694
*a = r.clamp(MIN, MAX) as i16;
4695
}
4696
self.state[operands.dst].set_i16x8(a);
4697
ControlFlow::Continue(())
4698
}
4699
4700
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4701
fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4702
let a = self.state[src].get_u8x16();
4703
self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8));
4704
ControlFlow::Continue(())
4705
}
4706
4707
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4708
fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4709
let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
4710
self.state[dst].set_u32(u32::from(a));
4711
ControlFlow::Continue(())
4712
}
4713
4714
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4715
fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4716
let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) };
4717
self.state[dst].set_u32(u32::from(a));
4718
ControlFlow::Continue(())
4719
}
4720
4721
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4722
fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4723
let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) };
4724
self.state[dst].set_u32(a);
4725
ControlFlow::Continue(())
4726
}
4727
4728
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4729
fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4730
let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) };
4731
self.state[dst].set_u64(a);
4732
ControlFlow::Continue(())
4733
}
4734
4735
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4736
fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4737
let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) };
4738
self.state[dst].set_f32(a);
4739
ControlFlow::Continue(())
4740
}
4741
4742
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4743
fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4744
let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) };
4745
self.state[dst].set_f64(a);
4746
ControlFlow::Continue(())
4747
}
4748
4749
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4750
fn vinsertx8(
4751
&mut self,
4752
operands: BinaryOperands<VReg, VReg, XReg>,
4753
lane: u8,
4754
) -> ControlFlow<Done> {
4755
let mut a = self.state[operands.src1].get_u8x16();
4756
let b = self.state[operands.src2].get_u32() as u8;
4757
unsafe {
4758
*a.get_unchecked_mut(usize::from(lane)) = b;
4759
}
4760
self.state[operands.dst].set_u8x16(a);
4761
ControlFlow::Continue(())
4762
}
4763
4764
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4765
fn vinsertx16(
4766
&mut self,
4767
operands: BinaryOperands<VReg, VReg, XReg>,
4768
lane: u8,
4769
) -> ControlFlow<Done> {
4770
let mut a = self.state[operands.src1].get_u16x8();
4771
let b = self.state[operands.src2].get_u32() as u16;
4772
unsafe {
4773
*a.get_unchecked_mut(usize::from(lane)) = b;
4774
}
4775
self.state[operands.dst].set_u16x8(a);
4776
ControlFlow::Continue(())
4777
}
4778
4779
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4780
fn vinsertx32(
4781
&mut self,
4782
operands: BinaryOperands<VReg, VReg, XReg>,
4783
lane: u8,
4784
) -> ControlFlow<Done> {
4785
let mut a = self.state[operands.src1].get_u32x4();
4786
let b = self.state[operands.src2].get_u32();
4787
unsafe {
4788
*a.get_unchecked_mut(usize::from(lane)) = b;
4789
}
4790
self.state[operands.dst].set_u32x4(a);
4791
ControlFlow::Continue(())
4792
}
4793
4794
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4795
fn vinsertx64(
4796
&mut self,
4797
operands: BinaryOperands<VReg, VReg, XReg>,
4798
lane: u8,
4799
) -> ControlFlow<Done> {
4800
let mut a = self.state[operands.src1].get_u64x2();
4801
let b = self.state[operands.src2].get_u64();
4802
unsafe {
4803
*a.get_unchecked_mut(usize::from(lane)) = b;
4804
}
4805
self.state[operands.dst].set_u64x2(a);
4806
ControlFlow::Continue(())
4807
}
4808
4809
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4810
fn vinsertf32(
4811
&mut self,
4812
operands: BinaryOperands<VReg, VReg, FReg>,
4813
lane: u8,
4814
) -> ControlFlow<Done> {
4815
let mut a = self.state[operands.src1].get_f32x4();
4816
let b = self.state[operands.src2].get_f32();
4817
unsafe {
4818
*a.get_unchecked_mut(usize::from(lane)) = b;
4819
}
4820
self.state[operands.dst].set_f32x4(a);
4821
ControlFlow::Continue(())
4822
}
4823
4824
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4825
fn vinsertf64(
4826
&mut self,
4827
operands: BinaryOperands<VReg, VReg, FReg>,
4828
lane: u8,
4829
) -> ControlFlow<Done> {
4830
let mut a = self.state[operands.src1].get_f64x2();
4831
let b = self.state[operands.src2].get_f64();
4832
unsafe {
4833
*a.get_unchecked_mut(usize::from(lane)) = b;
4834
}
4835
self.state[operands.dst].set_f64x2(a);
4836
ControlFlow::Continue(())
4837
}
4838
4839
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4840
fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4841
let a = self.state[operands.src1].get_u8x16();
4842
let b = self.state[operands.src2].get_u8x16();
4843
let mut c = [0; 16];
4844
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4845
*c = if a == b { u8::MAX } else { 0 };
4846
}
4847
self.state[operands.dst].set_u8x16(c);
4848
ControlFlow::Continue(())
4849
}
4850
4851
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4852
fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4853
let a = self.state[operands.src1].get_u8x16();
4854
let b = self.state[operands.src2].get_u8x16();
4855
let mut c = [0; 16];
4856
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4857
*c = if a != b { u8::MAX } else { 0 };
4858
}
4859
self.state[operands.dst].set_u8x16(c);
4860
ControlFlow::Continue(())
4861
}
4862
4863
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4864
fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4865
let a = self.state[operands.src1].get_i8x16();
4866
let b = self.state[operands.src2].get_i8x16();
4867
let mut c = [0; 16];
4868
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4869
*c = if a < b { u8::MAX } else { 0 };
4870
}
4871
self.state[operands.dst].set_u8x16(c);
4872
ControlFlow::Continue(())
4873
}
4874
4875
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4876
fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4877
let a = self.state[operands.src1].get_i8x16();
4878
let b = self.state[operands.src2].get_i8x16();
4879
let mut c = [0; 16];
4880
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4881
*c = if a <= b { u8::MAX } else { 0 };
4882
}
4883
self.state[operands.dst].set_u8x16(c);
4884
ControlFlow::Continue(())
4885
}
4886
4887
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4888
fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4889
let a = self.state[operands.src1].get_u8x16();
4890
let b = self.state[operands.src2].get_u8x16();
4891
let mut c = [0; 16];
4892
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4893
*c = if a < b { u8::MAX } else { 0 };
4894
}
4895
self.state[operands.dst].set_u8x16(c);
4896
ControlFlow::Continue(())
4897
}
4898
4899
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4900
fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4901
let a = self.state[operands.src1].get_u8x16();
4902
let b = self.state[operands.src2].get_u8x16();
4903
let mut c = [0; 16];
4904
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4905
*c = if a <= b { u8::MAX } else { 0 };
4906
}
4907
self.state[operands.dst].set_u8x16(c);
4908
ControlFlow::Continue(())
4909
}
4910
4911
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4912
fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4913
let a = self.state[operands.src1].get_u16x8();
4914
let b = self.state[operands.src2].get_u16x8();
4915
let mut c = [0; 8];
4916
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4917
*c = if a == b { u16::MAX } else { 0 };
4918
}
4919
self.state[operands.dst].set_u16x8(c);
4920
ControlFlow::Continue(())
4921
}
4922
4923
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4924
fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4925
let a = self.state[operands.src1].get_u16x8();
4926
let b = self.state[operands.src2].get_u16x8();
4927
let mut c = [0; 8];
4928
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4929
*c = if a != b { u16::MAX } else { 0 };
4930
}
4931
self.state[operands.dst].set_u16x8(c);
4932
ControlFlow::Continue(())
4933
}
4934
4935
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4936
fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4937
let a = self.state[operands.src1].get_i16x8();
4938
let b = self.state[operands.src2].get_i16x8();
4939
let mut c = [0; 8];
4940
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4941
*c = if a < b { u16::MAX } else { 0 };
4942
}
4943
self.state[operands.dst].set_u16x8(c);
4944
ControlFlow::Continue(())
4945
}
4946
4947
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4948
fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4949
let a = self.state[operands.src1].get_i16x8();
4950
let b = self.state[operands.src2].get_i16x8();
4951
let mut c = [0; 8];
4952
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4953
*c = if a <= b { u16::MAX } else { 0 };
4954
}
4955
self.state[operands.dst].set_u16x8(c);
4956
ControlFlow::Continue(())
4957
}
4958
4959
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4960
fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4961
let a = self.state[operands.src1].get_u16x8();
4962
let b = self.state[operands.src2].get_u16x8();
4963
let mut c = [0; 8];
4964
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4965
*c = if a < b { u16::MAX } else { 0 };
4966
}
4967
self.state[operands.dst].set_u16x8(c);
4968
ControlFlow::Continue(())
4969
}
4970
4971
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4972
fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4973
let a = self.state[operands.src1].get_u16x8();
4974
let b = self.state[operands.src2].get_u16x8();
4975
let mut c = [0; 8];
4976
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4977
*c = if a <= b { u16::MAX } else { 0 };
4978
}
4979
self.state[operands.dst].set_u16x8(c);
4980
ControlFlow::Continue(())
4981
}
4982
4983
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4984
fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4985
let a = self.state[operands.src1].get_u32x4();
4986
let b = self.state[operands.src2].get_u32x4();
4987
let mut c = [0; 4];
4988
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4989
*c = if a == b { u32::MAX } else { 0 };
4990
}
4991
self.state[operands.dst].set_u32x4(c);
4992
ControlFlow::Continue(())
4993
}
4994
4995
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4996
fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4997
let a = self.state[operands.src1].get_u32x4();
4998
let b = self.state[operands.src2].get_u32x4();
4999
let mut c = [0; 4];
5000
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5001
*c = if a != b { u32::MAX } else { 0 };
5002
}
5003
self.state[operands.dst].set_u32x4(c);
5004
ControlFlow::Continue(())
5005
}
5006
5007
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5008
fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5009
let a = self.state[operands.src1].get_i32x4();
5010
let b = self.state[operands.src2].get_i32x4();
5011
let mut c = [0; 4];
5012
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5013
*c = if a < b { u32::MAX } else { 0 };
5014
}
5015
self.state[operands.dst].set_u32x4(c);
5016
ControlFlow::Continue(())
5017
}
5018
5019
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5020
fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5021
let a = self.state[operands.src1].get_i32x4();
5022
let b = self.state[operands.src2].get_i32x4();
5023
let mut c = [0; 4];
5024
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5025
*c = if a <= b { u32::MAX } else { 0 };
5026
}
5027
self.state[operands.dst].set_u32x4(c);
5028
ControlFlow::Continue(())
5029
}
5030
5031
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5032
fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5033
let a = self.state[operands.src1].get_u32x4();
5034
let b = self.state[operands.src2].get_u32x4();
5035
let mut c = [0; 4];
5036
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5037
*c = if a < b { u32::MAX } else { 0 };
5038
}
5039
self.state[operands.dst].set_u32x4(c);
5040
ControlFlow::Continue(())
5041
}
5042
5043
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5044
fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5045
let a = self.state[operands.src1].get_u32x4();
5046
let b = self.state[operands.src2].get_u32x4();
5047
let mut c = [0; 4];
5048
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5049
*c = if a <= b { u32::MAX } else { 0 };
5050
}
5051
self.state[operands.dst].set_u32x4(c);
5052
ControlFlow::Continue(())
5053
}
5054
5055
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5056
fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5057
let a = self.state[operands.src1].get_u64x2();
5058
let b = self.state[operands.src2].get_u64x2();
5059
let mut c = [0; 2];
5060
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5061
*c = if a == b { u64::MAX } else { 0 };
5062
}
5063
self.state[operands.dst].set_u64x2(c);
5064
ControlFlow::Continue(())
5065
}
5066
5067
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5068
fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5069
let a = self.state[operands.src1].get_u64x2();
5070
let b = self.state[operands.src2].get_u64x2();
5071
let mut c = [0; 2];
5072
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5073
*c = if a != b { u64::MAX } else { 0 };
5074
}
5075
self.state[operands.dst].set_u64x2(c);
5076
ControlFlow::Continue(())
5077
}
5078
5079
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5080
fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5081
let a = self.state[operands.src1].get_i64x2();
5082
let b = self.state[operands.src2].get_i64x2();
5083
let mut c = [0; 2];
5084
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5085
*c = if a < b { u64::MAX } else { 0 };
5086
}
5087
self.state[operands.dst].set_u64x2(c);
5088
ControlFlow::Continue(())
5089
}
5090
5091
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5092
fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5093
let a = self.state[operands.src1].get_i64x2();
5094
let b = self.state[operands.src2].get_i64x2();
5095
let mut c = [0; 2];
5096
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5097
*c = if a <= b { u64::MAX } else { 0 };
5098
}
5099
self.state[operands.dst].set_u64x2(c);
5100
ControlFlow::Continue(())
5101
}
5102
5103
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5104
fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5105
let a = self.state[operands.src1].get_u64x2();
5106
let b = self.state[operands.src2].get_u64x2();
5107
let mut c = [0; 2];
5108
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5109
*c = if a < b { u64::MAX } else { 0 };
5110
}
5111
self.state[operands.dst].set_u64x2(c);
5112
ControlFlow::Continue(())
5113
}
5114
5115
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5116
fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5117
let a = self.state[operands.src1].get_u64x2();
5118
let b = self.state[operands.src2].get_u64x2();
5119
let mut c = [0; 2];
5120
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5121
*c = if a <= b { u64::MAX } else { 0 };
5122
}
5123
self.state[operands.dst].set_u64x2(c);
5124
ControlFlow::Continue(())
5125
}
5126
5127
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5128
fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5129
let a = self.state[src].get_i8x16();
5130
self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));
5131
ControlFlow::Continue(())
5132
}
5133
5134
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5135
fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5136
let a = self.state[src].get_i16x8();
5137
self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg()));
5138
ControlFlow::Continue(())
5139
}
5140
5141
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5142
fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5143
let a = self.state[src].get_i32x4();
5144
self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg()));
5145
ControlFlow::Continue(())
5146
}
5147
5148
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5149
fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5150
let a = self.state[src].get_i64x2();
5151
self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
5152
ControlFlow::Continue(())
5153
}
5154
5155
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5156
fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5157
let a = self.state[src].get_f64x2();
5158
self.state[dst].set_f64x2(a.map(|i| -i));
5159
ControlFlow::Continue(())
5160
}
5161
5162
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5163
fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5164
let mut a = self.state[operands.src1].get_i8x16();
5165
let b = self.state[operands.src2].get_i8x16();
5166
for (a, b) in a.iter_mut().zip(&b) {
5167
*a = (*a).min(*b);
5168
}
5169
self.state[operands.dst].set_i8x16(a);
5170
ControlFlow::Continue(())
5171
}
5172
5173
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5174
fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5175
let mut a = self.state[operands.src1].get_u8x16();
5176
let b = self.state[operands.src2].get_u8x16();
5177
for (a, b) in a.iter_mut().zip(&b) {
5178
*a = (*a).min(*b);
5179
}
5180
self.state[operands.dst].set_u8x16(a);
5181
ControlFlow::Continue(())
5182
}
5183
5184
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5185
fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5186
let mut a = self.state[operands.src1].get_i16x8();
5187
let b = self.state[operands.src2].get_i16x8();
5188
for (a, b) in a.iter_mut().zip(&b) {
5189
*a = (*a).min(*b);
5190
}
5191
self.state[operands.dst].set_i16x8(a);
5192
ControlFlow::Continue(())
5193
}
5194
5195
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5196
fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5197
let mut a = self.state[operands.src1].get_u16x8();
5198
let b = self.state[operands.src2].get_u16x8();
5199
for (a, b) in a.iter_mut().zip(&b) {
5200
*a = (*a).min(*b);
5201
}
5202
self.state[operands.dst].set_u16x8(a);
5203
ControlFlow::Continue(())
5204
}
5205
5206
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5207
fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5208
let mut a = self.state[operands.src1].get_i32x4();
5209
let b = self.state[operands.src2].get_i32x4();
5210
for (a, b) in a.iter_mut().zip(&b) {
5211
*a = (*a).min(*b);
5212
}
5213
self.state[operands.dst].set_i32x4(a);
5214
ControlFlow::Continue(())
5215
}
5216
5217
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5218
fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5219
let mut a = self.state[operands.src1].get_u32x4();
5220
let b = self.state[operands.src2].get_u32x4();
5221
for (a, b) in a.iter_mut().zip(&b) {
5222
*a = (*a).min(*b);
5223
}
5224
self.state[operands.dst].set_u32x4(a);
5225
ControlFlow::Continue(())
5226
}
5227
5228
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5229
fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5230
let mut a = self.state[operands.src1].get_i8x16();
5231
let b = self.state[operands.src2].get_i8x16();
5232
for (a, b) in a.iter_mut().zip(&b) {
5233
*a = (*a).max(*b);
5234
}
5235
self.state[operands.dst].set_i8x16(a);
5236
ControlFlow::Continue(())
5237
}
5238
5239
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5240
fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5241
let mut a = self.state[operands.src1].get_u8x16();
5242
let b = self.state[operands.src2].get_u8x16();
5243
for (a, b) in a.iter_mut().zip(&b) {
5244
*a = (*a).max(*b);
5245
}
5246
self.state[operands.dst].set_u8x16(a);
5247
ControlFlow::Continue(())
5248
}
5249
5250
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5251
fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5252
let mut a = self.state[operands.src1].get_i16x8();
5253
let b = self.state[operands.src2].get_i16x8();
5254
for (a, b) in a.iter_mut().zip(&b) {
5255
*a = (*a).max(*b);
5256
}
5257
self.state[operands.dst].set_i16x8(a);
5258
ControlFlow::Continue(())
5259
}
5260
5261
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5262
fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5263
let mut a = self.state[operands.src1].get_u16x8();
5264
let b = self.state[operands.src2].get_u16x8();
5265
for (a, b) in a.iter_mut().zip(&b) {
5266
*a = (*a).max(*b);
5267
}
5268
self.state[operands.dst].set_u16x8(a);
5269
ControlFlow::Continue(())
5270
}
5271
5272
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5273
fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5274
let mut a = self.state[operands.src1].get_i32x4();
5275
let b = self.state[operands.src2].get_i32x4();
5276
for (a, b) in a.iter_mut().zip(&b) {
5277
*a = (*a).max(*b);
5278
}
5279
self.state[operands.dst].set_i32x4(a);
5280
ControlFlow::Continue(())
5281
}
5282
5283
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5284
fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5285
let mut a = self.state[operands.src1].get_u32x4();
5286
let b = self.state[operands.src2].get_u32x4();
5287
for (a, b) in a.iter_mut().zip(&b) {
5288
*a = (*a).max(*b);
5289
}
5290
self.state[operands.dst].set_u32x4(a);
5291
ControlFlow::Continue(())
5292
}
5293
5294
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5295
fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5296
let a = self.state[src].get_i8x16();
5297
self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs()));
5298
ControlFlow::Continue(())
5299
}
5300
5301
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5302
fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5303
let a = self.state[src].get_i16x8();
5304
self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
5305
ControlFlow::Continue(())
5306
}
5307
5308
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5309
fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5310
let a = self.state[src].get_i32x4();
5311
self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs()));
5312
ControlFlow::Continue(())
5313
}
5314
5315
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5316
fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5317
let a = self.state[src].get_i64x2();
5318
self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs()));
5319
ControlFlow::Continue(())
5320
}
5321
5322
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5323
fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5324
let a = self.state[src].get_f32x4();
5325
self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
5326
ControlFlow::Continue(())
5327
}
5328
5329
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5330
fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5331
let a = self.state[src].get_f64x2();
5332
self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
5333
ControlFlow::Continue(())
5334
}
5335
5336
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5337
fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5338
let mut a = self.state[operands.src1].get_f32x4();
5339
let b = self.state[operands.src2].get_f32x4();
5340
for (a, b) in a.iter_mut().zip(&b) {
5341
*a = a.wasm_maximum(*b);
5342
}
5343
self.state[operands.dst].set_f32x4(a);
5344
ControlFlow::Continue(())
5345
}
5346
5347
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5348
fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5349
let mut a = self.state[operands.src1].get_f64x2();
5350
let b = self.state[operands.src2].get_f64x2();
5351
for (a, b) in a.iter_mut().zip(&b) {
5352
*a = a.wasm_maximum(*b);
5353
}
5354
self.state[operands.dst].set_f64x2(a);
5355
ControlFlow::Continue(())
5356
}
5357
5358
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5359
fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5360
let mut a = self.state[operands.src1].get_f32x4();
5361
let b = self.state[operands.src2].get_f32x4();
5362
for (a, b) in a.iter_mut().zip(&b) {
5363
*a = a.wasm_minimum(*b);
5364
}
5365
self.state[operands.dst].set_f32x4(a);
5366
ControlFlow::Continue(())
5367
}
5368
5369
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5370
fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5371
let mut a = self.state[operands.src1].get_f64x2();
5372
let b = self.state[operands.src2].get_f64x2();
5373
for (a, b) in a.iter_mut().zip(&b) {
5374
*a = a.wasm_minimum(*b);
5375
}
5376
self.state[operands.dst].set_f64x2(a);
5377
ControlFlow::Continue(())
5378
}
5379
5380
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5381
fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> {
5382
let a = self.state[src1].get_u8x16();
5383
let b = self.state[src2].get_u8x16();
5384
let result = mask.to_le_bytes().map(|m| {
5385
if m < 16 {
5386
a[m as usize]
5387
} else {
5388
b[m as usize - 16]
5389
}
5390
});
5391
self.state[dst].set_u8x16(result);
5392
ControlFlow::Continue(())
5393
}
5394
5395
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5396
fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5397
let src1 = self.state[operands.src1].get_i8x16();
5398
let src2 = self.state[operands.src2].get_i8x16();
5399
let mut dst = [0i8; 16];
5400
for (i, &idx) in src2.iter().enumerate() {
5401
if (idx as usize) < 16 {
5402
dst[i] = src1[idx as usize];
5403
} else {
5404
dst[i] = 0
5405
}
5406
}
5407
self.state[operands.dst].set_i8x16(dst);
5408
ControlFlow::Continue(())
5409
}
5410
5411
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5412
fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5413
let mut a = self.state[operands.src1].get_u8x16();
5414
let b = self.state[operands.src2].get_u8x16();
5415
for (a, b) in a.iter_mut().zip(&b) {
5416
// use wider precision to avoid overflow
5417
*a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8;
5418
}
5419
self.state[operands.dst].set_u8x16(a);
5420
ControlFlow::Continue(())
5421
}
5422
5423
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5424
fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5425
let mut a = self.state[operands.src1].get_u16x8();
5426
let b = self.state[operands.src2].get_u16x8();
5427
for (a, b) in a.iter_mut().zip(&b) {
5428
// use wider precision to avoid overflow
5429
*a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
5430
}
5431
self.state[operands.dst].set_u16x8(a);
5432
ControlFlow::Continue(())
5433
}
5434
5435
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5436
fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5437
let a = self.state[operands.src1].get_f32x4();
5438
let b = self.state[operands.src2].get_f32x4();
5439
let mut c = [0; 4];
5440
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5441
*c = if a == b { u32::MAX } else { 0 };
5442
}
5443
self.state[operands.dst].set_u32x4(c);
5444
ControlFlow::Continue(())
5445
}
5446
5447
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5448
fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5449
let a = self.state[operands.src1].get_f32x4();
5450
let b = self.state[operands.src2].get_f32x4();
5451
let mut c = [0; 4];
5452
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5453
*c = if a != b { u32::MAX } else { 0 };
5454
}
5455
self.state[operands.dst].set_u32x4(c);
5456
ControlFlow::Continue(())
5457
}
5458
5459
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5460
fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5461
let a = self.state[operands.src1].get_f32x4();
5462
let b = self.state[operands.src2].get_f32x4();
5463
let mut c = [0; 4];
5464
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5465
*c = if a < b { u32::MAX } else { 0 };
5466
}
5467
self.state[operands.dst].set_u32x4(c);
5468
ControlFlow::Continue(())
5469
}
5470
5471
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5472
fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5473
let a = self.state[operands.src1].get_f32x4();
5474
let b = self.state[operands.src2].get_f32x4();
5475
let mut c = [0; 4];
5476
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5477
*c = if a <= b { u32::MAX } else { 0 };
5478
}
5479
self.state[operands.dst].set_u32x4(c);
5480
ControlFlow::Continue(())
5481
}
5482
5483
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5484
fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5485
let a = self.state[operands.src1].get_f64x2();
5486
let b = self.state[operands.src2].get_f64x2();
5487
let mut c = [0; 2];
5488
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5489
*c = if a == b { u64::MAX } else { 0 };
5490
}
5491
self.state[operands.dst].set_u64x2(c);
5492
ControlFlow::Continue(())
5493
}
5494
5495
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5496
fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5497
let a = self.state[operands.src1].get_f64x2();
5498
let b = self.state[operands.src2].get_f64x2();
5499
let mut c = [0; 2];
5500
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5501
*c = if a != b { u64::MAX } else { 0 };
5502
}
5503
self.state[operands.dst].set_u64x2(c);
5504
ControlFlow::Continue(())
5505
}
5506
5507
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5508
fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5509
let a = self.state[operands.src1].get_f64x2();
5510
let b = self.state[operands.src2].get_f64x2();
5511
let mut c = [0; 2];
5512
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5513
*c = if a < b { u64::MAX } else { 0 };
5514
}
5515
self.state[operands.dst].set_u64x2(c);
5516
ControlFlow::Continue(())
5517
}
5518
5519
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5520
fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5521
let a = self.state[operands.src1].get_f64x2();
5522
let b = self.state[operands.src2].get_f64x2();
5523
let mut c = [0; 2];
5524
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5525
*c = if a <= b { u64::MAX } else { 0 };
5526
}
5527
self.state[operands.dst].set_u64x2(c);
5528
ControlFlow::Continue(())
5529
}
5530
5531
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5532
fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5533
let mut a = self.state[a].get_f32x4();
5534
let b = self.state[b].get_f32x4();
5535
let c = self.state[c].get_f32x4();
5536
for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5537
*a = a.wasm_mul_add(b, c);
5538
}
5539
self.state[dst].set_f32x4(a);
5540
ControlFlow::Continue(())
5541
}
5542
5543
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5544
fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5545
let mut a = self.state[a].get_f64x2();
5546
let b = self.state[b].get_f64x2();
5547
let c = self.state[c].get_f64x2();
5548
for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5549
*a = a.wasm_mul_add(b, c);
5550
}
5551
self.state[dst].set_f64x2(a);
5552
ControlFlow::Continue(())
5553
}
5554
5555
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5556
fn vselect(
5557
&mut self,
5558
dst: VReg,
5559
cond: XReg,
5560
if_nonzero: VReg,
5561
if_zero: VReg,
5562
) -> ControlFlow<Done> {
5563
let result = if self.state[cond].get_u32() != 0 {
5564
self.state[if_nonzero]
5565
} else {
5566
self.state[if_zero]
5567
};
5568
self.state[dst] = result;
5569
ControlFlow::Continue(())
5570
}
5571
5572
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5573
fn xadd128(
5574
&mut self,
5575
dst_lo: XReg,
5576
dst_hi: XReg,
5577
lhs_lo: XReg,
5578
lhs_hi: XReg,
5579
rhs_lo: XReg,
5580
rhs_hi: XReg,
5581
) -> ControlFlow<Done> {
5582
let lhs = self.get_i128(lhs_lo, lhs_hi);
5583
let rhs = self.get_i128(rhs_lo, rhs_hi);
5584
let result = lhs.wrapping_add(rhs);
5585
self.set_i128(dst_lo, dst_hi, result);
5586
ControlFlow::Continue(())
5587
}
5588
5589
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5590
fn xsub128(
5591
&mut self,
5592
dst_lo: XReg,
5593
dst_hi: XReg,
5594
lhs_lo: XReg,
5595
lhs_hi: XReg,
5596
rhs_lo: XReg,
5597
rhs_hi: XReg,
5598
) -> ControlFlow<Done> {
5599
let lhs = self.get_i128(lhs_lo, lhs_hi);
5600
let rhs = self.get_i128(rhs_lo, rhs_hi);
5601
let result = lhs.wrapping_sub(rhs);
5602
self.set_i128(dst_lo, dst_hi, result);
5603
ControlFlow::Continue(())
5604
}
5605
5606
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5607
fn xwidemul64_s(
5608
&mut self,
5609
dst_lo: XReg,
5610
dst_hi: XReg,
5611
lhs: XReg,
5612
rhs: XReg,
5613
) -> ControlFlow<Done> {
5614
let lhs = self.state[lhs].get_i64();
5615
let rhs = self.state[rhs].get_i64();
5616
let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
5617
self.set_i128(dst_lo, dst_hi, result);
5618
ControlFlow::Continue(())
5619
}
5620
5621
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5622
fn xwidemul64_u(
5623
&mut self,
5624
dst_lo: XReg,
5625
dst_hi: XReg,
5626
lhs: XReg,
5627
rhs: XReg,
5628
) -> ControlFlow<Done> {
5629
let lhs = self.state[lhs].get_u64();
5630
let rhs = self.state[rhs].get_u64();
5631
let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
5632
self.set_i128(dst_lo, dst_hi, result as i128);
5633
ControlFlow::Continue(())
5634
}
5635
}
5636
5637