Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/pulley/src/interp.rs
1690 views
1
//! Interpretation of pulley bytecode.
2
3
use crate::decode::*;
4
use crate::encode::Encode;
5
use crate::imms::*;
6
use crate::profile::{ExecutingPc, ExecutingPcRef};
7
use crate::regs::*;
8
use alloc::string::ToString;
9
use alloc::vec::Vec;
10
use core::fmt;
11
use core::mem;
12
use core::ops::ControlFlow;
13
use core::ops::{Index, IndexMut};
14
use core::ptr::NonNull;
15
use pulley_macros::interp_disable_if_cfg;
16
use wasmtime_math::{WasmFloat, f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
17
18
mod debug;
19
#[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))]
20
mod match_loop;
21
#[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))]
22
mod tail_loop;
23
24
const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB
25
26
/// A virtual machine for interpreting Pulley bytecode.
27
pub struct Vm {
28
state: MachineState,
29
executing_pc: ExecutingPc,
30
}
31
32
impl Default for Vm {
33
fn default() -> Self {
34
Vm::new()
35
}
36
}
37
38
impl Vm {
39
/// Create a new virtual machine with the default stack size.
40
pub fn new() -> Self {
41
Self::with_stack(DEFAULT_STACK_SIZE)
42
}
43
44
/// Create a new virtual machine with the given stack.
45
pub fn with_stack(stack_size: usize) -> Self {
46
Self {
47
state: MachineState::with_stack(stack_size),
48
executing_pc: ExecutingPc::default(),
49
}
50
}
51
52
/// Get a shared reference to this VM's machine state.
53
pub fn state(&self) -> &MachineState {
54
&self.state
55
}
56
57
/// Get an exclusive reference to this VM's machine state.
58
pub fn state_mut(&mut self) -> &mut MachineState {
59
&mut self.state
60
}
61
62
/// Call a bytecode function.
63
///
64
/// The given `func` must point to the beginning of a valid Pulley bytecode
65
/// function.
66
///
67
/// The given `args` must match the number and type of arguments that
68
/// function expects.
69
///
70
/// The given `rets` must match the function's actual return types.
71
///
72
/// Returns either the resulting values, or the PC at which a trap was
73
/// raised.
74
pub unsafe fn call<'a, T>(
75
&'a mut self,
76
func: NonNull<u8>,
77
args: &[Val],
78
rets: T,
79
) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>>
80
where
81
T: IntoIterator<Item = RegType> + 'a,
82
{
83
unsafe {
84
let lr = self.call_start(args);
85
86
match self.call_run(func) {
87
DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)),
88
DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind },
89
DoneReason::CallIndirectHost { id, resume } => {
90
DoneReason::CallIndirectHost { id, resume }
91
}
92
}
93
}
94
}
95
96
/// Performs the initial part of [`Vm::call`] in setting up the `args`
97
/// provided in registers according to Pulley's ABI.
98
///
99
/// # Return
100
///
101
/// Returns the old `lr` register value. The current `lr` value is replaced
102
/// with a sentinel that triggers a return to the host when returned-to.
103
///
104
/// # Unsafety
105
///
106
/// All the same unsafety as `call` and additionally, you must
107
/// invoke `call_run` and then `call_end` after calling `call_start`.
108
/// If you don't want to wrangle these invocations, use `call` instead
109
/// of `call_{start,run,end}`.
110
pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 {
111
// NB: make sure this method stays in sync with
112
// `PulleyMachineDeps::compute_arg_locs`!
113
114
let mut x_args = (0..16).map(|x| unsafe { XReg::new_unchecked(x) });
115
let mut f_args = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
116
#[cfg(not(pulley_disable_interp_simd))]
117
let mut v_args = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
118
119
for arg in args {
120
match arg {
121
Val::XReg(val) => match x_args.next() {
122
Some(reg) => self.state[reg] = *val,
123
None => todo!("stack slots"),
124
},
125
Val::FReg(val) => match f_args.next() {
126
Some(reg) => self.state[reg] = *val,
127
None => todo!("stack slots"),
128
},
129
#[cfg(not(pulley_disable_interp_simd))]
130
Val::VReg(val) => match v_args.next() {
131
Some(reg) => self.state[reg] = *val,
132
None => todo!("stack slots"),
133
},
134
}
135
}
136
137
mem::replace(&mut self.state.lr, HOST_RETURN_ADDR)
138
}
139
140
/// Peforms the internal part of [`Vm::call`] where bytecode is actually
141
/// executed.
142
///
143
/// # Unsafety
144
///
145
/// In addition to all the invariants documented for `call`, you
146
/// may only invoke `call_run` after invoking `call_start` to
147
/// initialize this call's arguments.
148
pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> {
149
self.state.debug_assert_done_reason_none();
150
let interpreter = Interpreter {
151
state: &mut self.state,
152
pc: unsafe { UnsafeBytecodeStream::new(pc) },
153
executing_pc: self.executing_pc.as_ref(),
154
};
155
let done = interpreter.run();
156
self.state.done_decode(done)
157
}
158
159
/// Peforms the tail end of [`Vm::call`] by returning the values as
160
/// determined by `rets` according to Pulley's ABI.
161
///
162
/// The `old_ret` value should have been provided from `call_start`
163
/// previously.
164
///
165
/// # Unsafety
166
///
167
/// In addition to the invariants documented for `call`, this may
168
/// only be called after `call_run`.
169
pub unsafe fn call_end<'a>(
170
&'a mut self,
171
old_ret: *mut u8,
172
rets: impl IntoIterator<Item = RegType> + 'a,
173
) -> impl Iterator<Item = Val> + 'a {
174
self.state.lr = old_ret;
175
// NB: make sure this method stays in sync with
176
// `PulleyMachineDeps::compute_arg_locs`!
177
178
let mut x_rets = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
179
let mut f_rets = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
180
#[cfg(not(pulley_disable_interp_simd))]
181
let mut v_rets = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
182
183
rets.into_iter().map(move |ty| match ty {
184
RegType::XReg => match x_rets.next() {
185
Some(reg) => Val::XReg(self.state[reg]),
186
None => todo!("stack slots"),
187
},
188
RegType::FReg => match f_rets.next() {
189
Some(reg) => Val::FReg(self.state[reg]),
190
None => todo!("stack slots"),
191
},
192
#[cfg(not(pulley_disable_interp_simd))]
193
RegType::VReg => match v_rets.next() {
194
Some(reg) => Val::VReg(self.state[reg]),
195
None => todo!("stack slots"),
196
},
197
#[cfg(pulley_disable_interp_simd)]
198
RegType::VReg => panic!("simd support disabled at compile time"),
199
})
200
}
201
202
/// Returns the current `fp` register value.
203
pub fn fp(&self) -> *mut u8 {
204
self.state.fp
205
}
206
207
/// Returns the current `lr` register value.
208
pub fn lr(&self) -> *mut u8 {
209
self.state.lr
210
}
211
212
/// Sets the current `fp` register value.
213
pub unsafe fn set_fp(&mut self, fp: *mut u8) {
214
self.state.fp = fp;
215
}
216
217
/// Sets the current `lr` register value.
218
pub unsafe fn set_lr(&mut self, lr: *mut u8) {
219
self.state.lr = lr;
220
}
221
222
/// Gets a handle to the currently executing program counter for this
223
/// interpreter which can be read from other threads.
224
//
225
// Note that despite this field still existing with `not(feature =
226
// "profile")` it's hidden from the public API in that scenario as it has no
227
// methods anyway.
228
#[cfg(feature = "profile")]
229
pub fn executing_pc(&self) -> &ExecutingPc {
230
&self.executing_pc
231
}
232
}
233
234
impl Drop for Vm {
235
fn drop(&mut self) {
236
self.executing_pc.set_done();
237
}
238
}
239
240
/// The type of a register in the Pulley machine state.
241
#[derive(Clone, Copy, Debug)]
242
pub enum RegType {
243
/// An `x` register: integers.
244
XReg,
245
246
/// An `f` register: floats.
247
FReg,
248
249
/// A `v` register: vectors.
250
VReg,
251
}
252
253
/// A value that can be stored in a register.
254
#[derive(Clone, Copy, Debug)]
255
pub enum Val {
256
/// An `x` register value: integers.
257
XReg(XRegVal),
258
259
/// An `f` register value: floats.
260
FReg(FRegVal),
261
262
/// A `v` register value: vectors.
263
#[cfg(not(pulley_disable_interp_simd))]
264
VReg(VRegVal),
265
}
266
267
impl fmt::LowerHex for Val {
268
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
269
match self {
270
Val::XReg(v) => fmt::LowerHex::fmt(v, f),
271
Val::FReg(v) => fmt::LowerHex::fmt(v, f),
272
#[cfg(not(pulley_disable_interp_simd))]
273
Val::VReg(v) => fmt::LowerHex::fmt(v, f),
274
}
275
}
276
}
277
278
impl From<XRegVal> for Val {
279
fn from(value: XRegVal) -> Self {
280
Val::XReg(value)
281
}
282
}
283
284
impl From<u64> for Val {
285
fn from(value: u64) -> Self {
286
XRegVal::new_u64(value).into()
287
}
288
}
289
290
impl From<u32> for Val {
291
fn from(value: u32) -> Self {
292
XRegVal::new_u32(value).into()
293
}
294
}
295
296
impl From<i64> for Val {
297
fn from(value: i64) -> Self {
298
XRegVal::new_i64(value).into()
299
}
300
}
301
302
impl From<i32> for Val {
303
fn from(value: i32) -> Self {
304
XRegVal::new_i32(value).into()
305
}
306
}
307
308
impl<T> From<*mut T> for Val {
309
fn from(value: *mut T) -> Self {
310
XRegVal::new_ptr(value).into()
311
}
312
}
313
314
impl From<FRegVal> for Val {
315
fn from(value: FRegVal) -> Self {
316
Val::FReg(value)
317
}
318
}
319
320
impl From<f64> for Val {
321
fn from(value: f64) -> Self {
322
FRegVal::new_f64(value).into()
323
}
324
}
325
326
impl From<f32> for Val {
327
fn from(value: f32) -> Self {
328
FRegVal::new_f32(value).into()
329
}
330
}
331
332
#[cfg(not(pulley_disable_interp_simd))]
333
impl From<VRegVal> for Val {
334
fn from(value: VRegVal) -> Self {
335
Val::VReg(value)
336
}
337
}
338
339
/// An `x` register value: integers.
340
#[derive(Copy, Clone)]
341
pub struct XRegVal(XRegUnion);
342
343
impl PartialEq for XRegVal {
344
fn eq(&self, other: &Self) -> bool {
345
self.get_u64() == other.get_u64()
346
}
347
}
348
349
impl Eq for XRegVal {}
350
351
impl fmt::Debug for XRegVal {
352
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353
f.debug_struct("XRegVal")
354
.field("as_u64", &self.get_u64())
355
.finish()
356
}
357
}
358
359
impl fmt::LowerHex for XRegVal {
360
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
361
fmt::LowerHex::fmt(&self.get_u64(), f)
362
}
363
}
364
365
/// Contents of an "x" register, or a general-purpose register.
366
///
367
/// This is represented as a Rust `union` to make it easier to access typed
368
/// views of this, notably the `ptr` field which enables preserving a bit of
369
/// provenance for Rust for values stored as a pointer and read as a pointer.
370
///
371
/// Note that the actual in-memory representation of this value is handled
372
/// carefully at this time. Pulley bytecode exposes the ability to store a
373
/// 32-bit result into a register and then read the 64-bit contents of the
374
/// register. This leaves us with the question of what to do with the upper bits
375
/// of the register when the 32-bit result is generated. Possibilities for
376
/// handling this are:
377
///
378
/// 1. Do nothing, just store the 32-bit value. The problem with this approach
379
/// means that the "upper bits" are now endianness-dependent. That means that
380
/// the state of the register is now platform-dependent.
381
/// 2. Sign or zero-extend. This restores platform-independent behavior but
382
/// requires an extra store on 32-bit platforms because they can probably
383
/// only store 32-bits at a time.
384
/// 3. Always store the values in this union as little-endian. This means that
385
/// big-endian platforms have to do a byte-swap but otherwise it has
386
/// platform-independent behavior.
387
///
388
/// This union chooses route (3) at this time where the values here are always
389
/// stored in little-endian form (even the `ptr` field). That guarantees
390
/// cross-platform behavior while also minimizing the amount of data stored on
391
/// writes.
392
///
393
/// In the future we may wish to benchmark this and possibly change this.
394
/// Technically Cranelift-generated bytecode should never rely on the upper bits
395
/// of a register if it didn't previously write them so this in theory doesn't
396
/// actually matter for Cranelift or wasm semantics. The only cost right now is
397
/// to big-endian platforms though and it's not certain how crucial performance
398
/// will be there.
399
///
400
/// One final note is that this notably contrasts with native CPUs where
401
/// native ISAs like RISC-V specifically define the entire register on every
402
/// instruction, even if only the low half contains a significant result. Pulley
403
/// is unlikely to become out-of-order within the CPU itself as it's interpreted
404
/// meaning that severing data-dependencies with previous operations is
405
/// hypothesized to not be too important. If this is ever a problem though it
406
/// could increase the likelihood we go for route (2) above instead (or maybe
407
/// even (1)).
408
#[derive(Copy, Clone)]
409
union XRegUnion {
410
i32: i32,
411
u32: u32,
412
i64: i64,
413
u64: u64,
414
415
// Note that this is intentionally `usize` and not an actual pointer like
416
// `*mut u8`. The reason for this is that provenance is required in Rust for
417
// pointers but Cranelift has no pointer type and thus no concept of
418
// provenance. That means that at-rest it's not known whether the value has
419
// provenance or not and basically means that Pulley is required to use
420
// "permissive provenance" in Rust as opposed to strict provenance.
421
//
422
// That's more-or-less a long-winded way of saying that storage of a pointer
423
// in this value is done with `.expose_provenance()` and reading a pointer
424
// uses `with_exposed_provenance_mut(..)`.
425
ptr: usize,
426
}
427
428
impl Default for XRegVal {
429
fn default() -> Self {
430
Self(unsafe { mem::zeroed() })
431
}
432
}
433
434
#[expect(missing_docs, reason = "self-describing methods")]
435
impl XRegVal {
436
pub fn new_i32(x: i32) -> Self {
437
let mut val = XRegVal::default();
438
val.set_i32(x);
439
val
440
}
441
442
pub fn new_u32(x: u32) -> Self {
443
let mut val = XRegVal::default();
444
val.set_u32(x);
445
val
446
}
447
448
pub fn new_i64(x: i64) -> Self {
449
let mut val = XRegVal::default();
450
val.set_i64(x);
451
val
452
}
453
454
pub fn new_u64(x: u64) -> Self {
455
let mut val = XRegVal::default();
456
val.set_u64(x);
457
val
458
}
459
460
pub fn new_ptr<T>(ptr: *mut T) -> Self {
461
let mut val = XRegVal::default();
462
val.set_ptr(ptr);
463
val
464
}
465
466
pub fn get_i32(&self) -> i32 {
467
let x = unsafe { self.0.i32 };
468
i32::from_le(x)
469
}
470
471
pub fn get_u32(&self) -> u32 {
472
let x = unsafe { self.0.u32 };
473
u32::from_le(x)
474
}
475
476
pub fn get_i64(&self) -> i64 {
477
let x = unsafe { self.0.i64 };
478
i64::from_le(x)
479
}
480
481
pub fn get_u64(&self) -> u64 {
482
let x = unsafe { self.0.u64 };
483
u64::from_le(x)
484
}
485
486
pub fn get_ptr<T>(&self) -> *mut T {
487
let ptr = unsafe { self.0.ptr };
488
core::ptr::with_exposed_provenance_mut(usize::from_le(ptr))
489
}
490
491
pub fn set_i32(&mut self, x: i32) {
492
self.0.i32 = x.to_le();
493
}
494
495
pub fn set_u32(&mut self, x: u32) {
496
self.0.u32 = x.to_le();
497
}
498
499
pub fn set_i64(&mut self, x: i64) {
500
self.0.i64 = x.to_le();
501
}
502
503
pub fn set_u64(&mut self, x: u64) {
504
self.0.u64 = x.to_le();
505
}
506
507
pub fn set_ptr<T>(&mut self, ptr: *mut T) {
508
self.0.ptr = ptr.expose_provenance().to_le();
509
}
510
}
511
512
/// An `f` register value: floats.
513
#[derive(Copy, Clone)]
514
pub struct FRegVal(FRegUnion);
515
516
impl fmt::Debug for FRegVal {
517
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
518
f.debug_struct("FRegVal")
519
.field("as_f32", &self.get_f32())
520
.field("as_f64", &self.get_f64())
521
.finish()
522
}
523
}
524
525
impl fmt::LowerHex for FRegVal {
526
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
527
fmt::LowerHex::fmt(&self.get_f64().to_bits(), f)
528
}
529
}
530
531
// NB: like `XRegUnion` values here are always little-endian, see the
532
// documentation above for more details.
533
#[derive(Copy, Clone)]
534
union FRegUnion {
535
f32: u32,
536
f64: u64,
537
}
538
539
impl Default for FRegVal {
540
fn default() -> Self {
541
Self(unsafe { mem::zeroed() })
542
}
543
}
544
545
#[expect(missing_docs, reason = "self-describing methods")]
546
impl FRegVal {
547
pub fn new_f32(f: f32) -> Self {
548
let mut val = Self::default();
549
val.set_f32(f);
550
val
551
}
552
553
pub fn new_f64(f: f64) -> Self {
554
let mut val = Self::default();
555
val.set_f64(f);
556
val
557
}
558
559
pub fn get_f32(&self) -> f32 {
560
let val = unsafe { self.0.f32 };
561
f32::from_le_bytes(val.to_ne_bytes())
562
}
563
564
pub fn get_f64(&self) -> f64 {
565
let val = unsafe { self.0.f64 };
566
f64::from_le_bytes(val.to_ne_bytes())
567
}
568
569
pub fn set_f32(&mut self, val: f32) {
570
self.0.f32 = u32::from_ne_bytes(val.to_le_bytes());
571
}
572
573
pub fn set_f64(&mut self, val: f64) {
574
self.0.f64 = u64::from_ne_bytes(val.to_le_bytes());
575
}
576
}
577
578
/// A `v` register value: vectors.
579
#[derive(Copy, Clone)]
580
#[cfg(not(pulley_disable_interp_simd))]
581
pub struct VRegVal(VRegUnion);
582
583
#[cfg(not(pulley_disable_interp_simd))]
584
impl fmt::Debug for VRegVal {
585
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
586
f.debug_struct("VRegVal")
587
.field("as_u128", &unsafe { self.0.u128 })
588
.finish()
589
}
590
}
591
592
#[cfg(not(pulley_disable_interp_simd))]
593
impl fmt::LowerHex for VRegVal {
594
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
595
fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f)
596
}
597
}
598
599
/// 128-bit vector registers.
600
///
601
/// This register is always stored in little-endian order and has different
602
/// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this
603
/// union are the same width so all bits are always defined. Note that
604
/// little-endian is required though so bitcasts between different shapes of
605
/// vectors works. This union cannot be stored in big-endian.
606
#[derive(Copy, Clone)]
607
#[repr(align(16))]
608
#[cfg(not(pulley_disable_interp_simd))]
609
union VRegUnion {
610
u128: u128,
611
i8x16: [i8; 16],
612
i16x8: [i16; 8],
613
i32x4: [i32; 4],
614
i64x2: [i64; 2],
615
u8x16: [u8; 16],
616
u16x8: [u16; 8],
617
u32x4: [u32; 4],
618
u64x2: [u64; 2],
619
// Note that these are `u32` and `u64`, not f32/f64. That's only because
620
// f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the
621
// bits anyway.
622
f32x4: [u32; 4],
623
f64x2: [u64; 2],
624
}
625
626
#[cfg(not(pulley_disable_interp_simd))]
627
impl Default for VRegVal {
628
fn default() -> Self {
629
Self(unsafe { mem::zeroed() })
630
}
631
}
632
633
#[expect(missing_docs, reason = "self-describing methods")]
634
#[cfg(not(pulley_disable_interp_simd))]
635
impl VRegVal {
636
pub fn new_u128(i: u128) -> Self {
637
let mut val = Self::default();
638
val.set_u128(i);
639
val
640
}
641
642
pub fn get_u128(&self) -> u128 {
643
let val = unsafe { self.0.u128 };
644
u128::from_le(val)
645
}
646
647
pub fn set_u128(&mut self, val: u128) {
648
self.0.u128 = val.to_le();
649
}
650
651
fn get_i8x16(&self) -> [i8; 16] {
652
let val = unsafe { self.0.i8x16 };
653
val.map(|e| i8::from_le(e))
654
}
655
656
fn set_i8x16(&mut self, val: [i8; 16]) {
657
self.0.i8x16 = val.map(|e| e.to_le());
658
}
659
660
fn get_u8x16(&self) -> [u8; 16] {
661
let val = unsafe { self.0.u8x16 };
662
val.map(|e| u8::from_le(e))
663
}
664
665
fn set_u8x16(&mut self, val: [u8; 16]) {
666
self.0.u8x16 = val.map(|e| e.to_le());
667
}
668
669
fn get_i16x8(&self) -> [i16; 8] {
670
let val = unsafe { self.0.i16x8 };
671
val.map(|e| i16::from_le(e))
672
}
673
674
fn set_i16x8(&mut self, val: [i16; 8]) {
675
self.0.i16x8 = val.map(|e| e.to_le());
676
}
677
678
fn get_u16x8(&self) -> [u16; 8] {
679
let val = unsafe { self.0.u16x8 };
680
val.map(|e| u16::from_le(e))
681
}
682
683
fn set_u16x8(&mut self, val: [u16; 8]) {
684
self.0.u16x8 = val.map(|e| e.to_le());
685
}
686
687
fn get_i32x4(&self) -> [i32; 4] {
688
let val = unsafe { self.0.i32x4 };
689
val.map(|e| i32::from_le(e))
690
}
691
692
fn set_i32x4(&mut self, val: [i32; 4]) {
693
self.0.i32x4 = val.map(|e| e.to_le());
694
}
695
696
fn get_u32x4(&self) -> [u32; 4] {
697
let val = unsafe { self.0.u32x4 };
698
val.map(|e| u32::from_le(e))
699
}
700
701
fn set_u32x4(&mut self, val: [u32; 4]) {
702
self.0.u32x4 = val.map(|e| e.to_le());
703
}
704
705
fn get_i64x2(&self) -> [i64; 2] {
706
let val = unsafe { self.0.i64x2 };
707
val.map(|e| i64::from_le(e))
708
}
709
710
fn set_i64x2(&mut self, val: [i64; 2]) {
711
self.0.i64x2 = val.map(|e| e.to_le());
712
}
713
714
fn get_u64x2(&self) -> [u64; 2] {
715
let val = unsafe { self.0.u64x2 };
716
val.map(|e| u64::from_le(e))
717
}
718
719
fn set_u64x2(&mut self, val: [u64; 2]) {
720
self.0.u64x2 = val.map(|e| e.to_le());
721
}
722
723
fn get_f64x2(&self) -> [f64; 2] {
724
let val = unsafe { self.0.f64x2 };
725
val.map(|e| f64::from_bits(u64::from_le(e)))
726
}
727
728
fn set_f64x2(&mut self, val: [f64; 2]) {
729
self.0.f64x2 = val.map(|e| e.to_bits().to_le());
730
}
731
732
fn get_f32x4(&self) -> [f32; 4] {
733
let val = unsafe { self.0.f32x4 };
734
val.map(|e| f32::from_bits(u32::from_le(e)))
735
}
736
737
fn set_f32x4(&mut self, val: [f32; 4]) {
738
self.0.f32x4 = val.map(|e| e.to_bits().to_le());
739
}
740
}
741
742
/// The machine state for a Pulley virtual machine: the various registers and
743
/// stack.
744
pub struct MachineState {
745
x_regs: [XRegVal; XReg::RANGE.end as usize],
746
f_regs: [FRegVal; FReg::RANGE.end as usize],
747
#[cfg(not(pulley_disable_interp_simd))]
748
v_regs: [VRegVal; VReg::RANGE.end as usize],
749
fp: *mut u8,
750
lr: *mut u8,
751
stack: Stack,
752
done_reason: Option<DoneReason<()>>,
753
}
754
755
unsafe impl Send for MachineState {}
756
unsafe impl Sync for MachineState {}
757
758
/// Helper structure to store the state of the Pulley stack.
759
///
760
/// The Pulley stack notably needs to be a 16-byte aligned allocation on the
761
/// host to ensure that addresses handed out are indeed 16-byte aligned. This is
762
/// done with a custom `Vec<T>` internally where `T` has size and align of 16.
763
/// This is manually done with a helper `Align16` type below.
764
struct Stack {
765
storage: Vec<Align16>,
766
}
767
768
/// Helper type used with `Stack` above.
769
#[derive(Copy, Clone)]
770
#[repr(align(16))]
771
struct Align16 {
772
// Just here to give the structure a size of 16. The alignment is always 16
773
// regardless of what the host platform's alignment of u128 is.
774
_unused: u128,
775
}
776
777
impl Stack {
778
/// Creates a new stack which will have a byte size of at least `size`.
779
///
780
/// The allocated stack might be slightly larger due to rounding necessary.
781
fn new(size: usize) -> Stack {
782
Stack {
783
// Round up `size` to the nearest multiple of 16. Note that the
784
// stack is also allocated here but not initialized, and that's
785
// intentional as pulley bytecode should always initialize the stack
786
// before use.
787
storage: Vec::with_capacity((size + 15) / 16),
788
}
789
}
790
791
/// Returns a pointer to the top of the stack (the highest address).
792
///
793
/// Note that the returned pointer has provenance for the entire stack
794
/// allocation, however, not just the top.
795
fn top(&mut self) -> *mut u8 {
796
let len = self.len();
797
unsafe { self.base().add(len) }
798
}
799
800
/// Returns a pointer to the base of the stack (the lowest address).
801
///
802
/// Note that the returned pointer has provenance for the entire stack
803
/// allocation, however, not just the top.
804
fn base(&mut self) -> *mut u8 {
805
self.storage.as_mut_ptr().cast::<u8>()
806
}
807
808
/// Returns the length, in bytes, of this stack allocation.
809
fn len(&self) -> usize {
810
self.storage.capacity() * mem::size_of::<Align16>()
811
}
812
}
813
814
impl fmt::Debug for MachineState {
815
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
816
let MachineState {
817
x_regs,
818
f_regs,
819
#[cfg(not(pulley_disable_interp_simd))]
820
v_regs,
821
stack: _,
822
done_reason: _,
823
fp: _,
824
lr: _,
825
} = self;
826
827
struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String);
828
829
impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> {
830
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
831
let mut f = f.debug_map();
832
for (i, r) in self.0.iter().enumerate() {
833
f.entry(&(self.1)(i as u8), r);
834
}
835
f.finish()
836
}
837
}
838
839
let mut f = f.debug_struct("MachineState");
840
841
f.field(
842
"x_regs",
843
&RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()),
844
)
845
.field(
846
"f_regs",
847
&RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()),
848
);
849
#[cfg(not(pulley_disable_interp_simd))]
850
f.field(
851
"v_regs",
852
&RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()),
853
);
854
f.finish_non_exhaustive()
855
}
856
}
857
858
macro_rules! index_reg {
859
($reg_ty:ty,$value_ty:ty,$field:ident) => {
860
impl Index<$reg_ty> for Vm {
861
type Output = $value_ty;
862
863
fn index(&self, reg: $reg_ty) -> &Self::Output {
864
&self.state[reg]
865
}
866
}
867
868
impl IndexMut<$reg_ty> for Vm {
869
fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
870
&mut self.state[reg]
871
}
872
}
873
874
impl Index<$reg_ty> for MachineState {
875
type Output = $value_ty;
876
877
fn index(&self, reg: $reg_ty) -> &Self::Output {
878
&self.$field[reg.index()]
879
}
880
}
881
882
impl IndexMut<$reg_ty> for MachineState {
883
fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
884
&mut self.$field[reg.index()]
885
}
886
}
887
};
888
}
889
890
index_reg!(XReg, XRegVal, x_regs);
891
index_reg!(FReg, FRegVal, f_regs);
892
#[cfg(not(pulley_disable_interp_simd))]
893
index_reg!(VReg, VRegVal, v_regs);
894
895
/// Sentinel return address that signals the end of the call stack.
896
const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8;
897
898
impl MachineState {
899
fn with_stack(stack_size: usize) -> Self {
900
let mut state = Self {
901
x_regs: [Default::default(); XReg::RANGE.end as usize],
902
f_regs: Default::default(),
903
#[cfg(not(pulley_disable_interp_simd))]
904
v_regs: Default::default(),
905
stack: Stack::new(stack_size),
906
done_reason: None,
907
fp: HOST_RETURN_ADDR,
908
lr: HOST_RETURN_ADDR,
909
};
910
911
let sp = state.stack.top();
912
state[XReg::sp] = XRegVal::new_ptr(sp);
913
914
state
915
}
916
}
917
918
/// Inner private module to prevent creation of the `Done` structure outside of
919
/// this module.
920
mod done {
921
use super::{Encode, Interpreter, MachineState};
922
use core::ops::ControlFlow;
923
use core::ptr::NonNull;
924
925
/// Zero-sized sentinel indicating that pulley execution has halted.
926
///
927
/// The reason for halting is stored in `MachineState`.
928
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
929
pub struct Done {
930
_priv: (),
931
}
932
933
/// Reason that the pulley interpreter has ceased execution.
934
pub enum DoneReason<T> {
935
/// A trap happened at this bytecode instruction.
936
Trap {
937
/// Which instruction is raising this trap.
938
pc: NonNull<u8>,
939
/// The kind of trap being raised, if known.
940
kind: Option<TrapKind>,
941
},
942
/// The `call_indirect_host` instruction was executed.
943
CallIndirectHost {
944
/// The payload of `call_indirect_host`.
945
id: u8,
946
/// Where to resume execution after the host has finished.
947
resume: NonNull<u8>,
948
},
949
/// Pulley has finished and the provided value is being returned.
950
ReturnToHost(T),
951
}
952
953
/// Stored within `DoneReason::Trap`.
954
#[expect(missing_docs, reason = "self-describing variants")]
955
pub enum TrapKind {
956
DivideByZero,
957
IntegerOverflow,
958
BadConversionToInteger,
959
MemoryOutOfBounds,
960
DisabledOpcode,
961
StackOverflow,
962
}
963
964
impl MachineState {
965
pub(super) fn debug_assert_done_reason_none(&mut self) {
966
debug_assert!(self.done_reason.is_none());
967
}
968
969
pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> {
970
self.done_reason.take().unwrap()
971
}
972
}
973
974
impl Interpreter<'_> {
975
/// Finishes execution by recording `DoneReason::Trap`.
976
///
977
/// This method takes an `I` generic parameter indicating which
978
/// instruction is executing this function and generating a trap. That's
979
/// used to go backwards from the current `pc` which is just beyond the
980
/// instruction to point to the instruction itself in the trap metadata
981
/// returned from the interpreter.
982
#[cold]
983
pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> {
984
self.done_trap_kind::<I>(None)
985
}
986
987
/// Same as `done_trap` but with an explicit `TrapKind`.
988
#[cold]
989
pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> {
990
let pc = self.current_pc::<I>();
991
self.state.done_reason = Some(DoneReason::Trap { pc, kind });
992
ControlFlow::Break(Done { _priv: () })
993
}
994
995
/// Finishes execution by recording `DoneReason::CallIndirectHost`.
996
#[cold]
997
pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
998
self.state.done_reason = Some(DoneReason::CallIndirectHost {
999
id,
1000
resume: self.pc.as_ptr(),
1001
});
1002
ControlFlow::Break(Done { _priv: () })
1003
}
1004
1005
/// Finishes execution by recording `DoneReason::ReturnToHost`.
1006
#[cold]
1007
pub fn done_return_to_host(&mut self) -> ControlFlow<Done> {
1008
self.state.done_reason = Some(DoneReason::ReturnToHost(()));
1009
ControlFlow::Break(Done { _priv: () })
1010
}
1011
}
1012
}
1013
1014
use done::Done;
1015
pub use done::{DoneReason, TrapKind};
1016
1017
struct Interpreter<'a> {
1018
state: &'a mut MachineState,
1019
pc: UnsafeBytecodeStream,
1020
executing_pc: ExecutingPcRef<'a>,
1021
}
1022
1023
impl Interpreter<'_> {
1024
/// Calculates the `offset` for the current instruction `I`.
1025
#[inline]
1026
fn pc_rel<I: Encode>(&mut self, offset: PcRelOffset) -> NonNull<u8> {
1027
let offset = isize::try_from(i32::from(offset)).unwrap();
1028
unsafe { self.current_pc::<I>().offset(offset) }
1029
}
1030
1031
/// Performs a relative jump of `offset` bytes from the current instruction.
1032
///
1033
/// This will jump from the start of the current instruction, identified by
1034
/// `I`, `offset` bytes away. Note that the `self.pc` at the start of this
1035
/// function actually points to the instruction after this one so `I` is
1036
/// necessary to go back to ourselves after which we then go `offset` away.
1037
#[inline]
1038
fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1039
let new_pc = self.pc_rel::<I>(offset);
1040
self.pc = unsafe { UnsafeBytecodeStream::new(new_pc) };
1041
ControlFlow::Continue(())
1042
}
1043
1044
/// Returns the PC of the current instruction where `I` is the static type
1045
/// representing the current instruction.
1046
fn current_pc<I: Encode>(&self) -> NonNull<u8> {
1047
unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() }
1048
}
1049
1050
/// `sp -= size_of::<T>(); *sp = val;`
1051
///
1052
/// Note that `I` is the instruction which is pushing data to use if a trap
1053
/// is generated.
1054
#[must_use]
1055
fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> {
1056
let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1);
1057
self.set_sp::<I>(new_sp.cast())?;
1058
unsafe {
1059
new_sp.write_unaligned(val);
1060
}
1061
ControlFlow::Continue(())
1062
}
1063
1064
/// `ret = *sp; sp -= size_of::<T>()`
1065
fn pop<T>(&mut self) -> T {
1066
let sp = self.state[XReg::sp].get_ptr::<T>();
1067
let val = unsafe { sp.read_unaligned() };
1068
self.set_sp_unchecked(sp.wrapping_add(1));
1069
val
1070
}
1071
1072
/// Sets the stack pointer to the `sp` provided.
1073
///
1074
/// Returns a trap if this would result in stack overflow, or if `sp` is
1075
/// beneath the base pointer of `self.state.stack`.
1076
///
1077
/// The `I` parameter here is the instruction that is setting the stack
1078
/// pointer and is used to calculate this instruction's own `pc` if this
1079
/// instruction traps.
1080
#[must_use]
1081
fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> {
1082
let sp_raw = sp as usize;
1083
let base_raw = self.state.stack.base() as usize;
1084
if sp_raw < base_raw {
1085
return self.done_trap_kind::<I>(Some(TrapKind::StackOverflow));
1086
}
1087
self.set_sp_unchecked(sp);
1088
ControlFlow::Continue(())
1089
}
1090
1091
/// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should
1092
/// only be used with stack increment operations such as `pop`.
1093
fn set_sp_unchecked<T>(&mut self, sp: *mut T) {
1094
if cfg!(debug_assertions) {
1095
let sp_raw = sp as usize;
1096
let base = self.state.stack.base() as usize;
1097
let end = base + self.state.stack.len();
1098
assert!(base <= sp_raw && sp_raw <= end);
1099
}
1100
self.state[XReg::sp].set_ptr(sp);
1101
}
1102
1103
/// Loads a value of `T` using native-endian byte ordering from the `addr`
1104
/// specified.
1105
///
1106
/// The `I` type parameter is the instruction issuing this load which is
1107
/// used in case of traps to calculate the trapping pc.
1108
///
1109
/// Returns `ControlFlow::Break` if a trap happens or
1110
/// `ControlFlow::Continue` if the value was loaded successfully.
1111
///
1112
/// # Unsafety
1113
///
1114
/// Safety of this method relies on the safety of the original bytecode
1115
/// itself and correctly annotating both `T` and `I`.
1116
#[must_use]
1117
unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> {
1118
unsafe { addr.load_ne::<T, I>(self) }
1119
}
1120
1121
/// Stores a `val` to the `addr` specified.
1122
///
1123
/// The `I` type parameter is the instruction issuing this store which is
1124
/// used in case of traps to calculate the trapping pc.
1125
///
1126
/// Returns `ControlFlow::Break` if a trap happens or
1127
/// `ControlFlow::Continue` if the value was stored successfully.
1128
///
1129
/// # Unsafety
1130
///
1131
/// Safety of this method relies on the safety of the original bytecode
1132
/// itself and correctly annotating both `T` and `I`.
1133
#[must_use]
1134
unsafe fn store_ne<T, I: Encode>(
1135
&mut self,
1136
addr: impl AddressingMode,
1137
val: T,
1138
) -> ControlFlow<Done> {
1139
unsafe { addr.store_ne::<T, I>(self, val) }
1140
}
1141
1142
fn check_xnn_from_f32<I: Encode>(
1143
&mut self,
1144
val: f32,
1145
(lo, hi): (f32, f32),
1146
) -> ControlFlow<Done> {
1147
self.check_xnn_from_f64::<I>(val.into(), (lo.into(), hi.into()))
1148
}
1149
1150
fn check_xnn_from_f64<I: Encode>(
1151
&mut self,
1152
val: f64,
1153
(lo, hi): (f64, f64),
1154
) -> ControlFlow<Done> {
1155
if val != val {
1156
return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger));
1157
}
1158
let val = val.wasm_trunc();
1159
if val <= lo || val >= hi {
1160
return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow));
1161
}
1162
ControlFlow::Continue(())
1163
}
1164
1165
#[cfg(not(pulley_disable_interp_simd))]
1166
fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1167
let lo = self.state[lo].get_u64();
1168
let hi = self.state[hi].get_i64();
1169
i128::from(lo) | (i128::from(hi) << 64)
1170
}
1171
1172
#[cfg(not(pulley_disable_interp_simd))]
1173
fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1174
self.state[lo].set_u64(val as u64);
1175
self.state[hi].set_u64((val >> 64) as u64);
1176
}
1177
1178
fn record_executing_pc_for_profiling(&mut self) {
1179
// Note that this is a no-op if `feature = "profile"` is disabled.
1180
self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize);
1181
}
1182
}
1183
1184
/// Helper trait to encompass the various addressing modes of Pulley.
1185
trait AddressingMode: Sized {
1186
/// Calculates the native host address `*mut T` corresponding to this
1187
/// addressing mode.
1188
///
1189
/// # Safety
1190
///
1191
/// Relies on the original bytecode being safe to execute as this will
1192
/// otherwise perform unsafe byte offsets for example which requires the
1193
/// original bytecode to be correct.
1194
#[must_use]
1195
unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>;
1196
1197
/// Loads a value of `T` from this address, using native-endian byte order.
1198
///
1199
/// For more information see [`Interpreter::load_ne`].
1200
#[must_use]
1201
unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> {
1202
let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() };
1203
ControlFlow::Continue(ret)
1204
}
1205
1206
/// Stores a `val` to this address, using native-endian byte order.
1207
///
1208
/// For more information see [`Interpreter::store_ne`].
1209
#[must_use]
1210
unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> {
1211
unsafe {
1212
self.addr::<T, I>(i)?.write_unaligned(val);
1213
}
1214
ControlFlow::Continue(())
1215
}
1216
}
1217
1218
impl AddressingMode for AddrO32 {
1219
unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1220
// Note that this addressing mode cannot return `ControlFlow::Break`
1221
// which is intentional. It's expected that LLVM optimizes away any
1222
// branches callers have.
1223
unsafe {
1224
ControlFlow::Continue(
1225
i.state[self.addr]
1226
.get_ptr::<T>()
1227
.byte_offset(self.offset as isize),
1228
)
1229
}
1230
}
1231
}
1232
1233
impl AddressingMode for AddrZ {
1234
unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1235
// This addressing mode defines loading/storing to the null address as
1236
// a trap, but all other addresses are allowed.
1237
let host_addr = i.state[self.addr].get_ptr::<T>();
1238
if host_addr.is_null() {
1239
i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1240
unreachable!();
1241
}
1242
unsafe {
1243
let addr = host_addr.byte_offset(self.offset as isize);
1244
ControlFlow::Continue(addr)
1245
}
1246
}
1247
}
1248
1249
impl AddressingMode for AddrG32 {
1250
unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1251
// Test if `bound - offset - T` is less than the wasm address to
1252
// generate a trap. It's a guarantee of this instruction that these
1253
// subtractions don't overflow.
1254
let bound = i.state[self.host_heap_bound].get_u64() as usize;
1255
let offset = usize::from(self.offset);
1256
let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1257
if wasm_addr > bound - offset - size_of::<T>() {
1258
i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1259
unreachable!();
1260
}
1261
unsafe {
1262
let addr = i.state[self.host_heap_base]
1263
.get_ptr::<T>()
1264
.byte_add(wasm_addr)
1265
.byte_add(offset);
1266
ControlFlow::Continue(addr)
1267
}
1268
}
1269
}
1270
1271
impl AddressingMode for AddrG32Bne {
1272
unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1273
// Same as `AddrG32` above except that the bound is loaded from memory.
1274
let bound = unsafe {
1275
*i.state[self.host_heap_bound_addr]
1276
.get_ptr::<usize>()
1277
.byte_add(usize::from(self.host_heap_bound_offset))
1278
};
1279
let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1280
let offset = usize::from(self.offset);
1281
if wasm_addr > bound - offset - size_of::<T>() {
1282
i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1283
unreachable!();
1284
}
1285
unsafe {
1286
let addr = i.state[self.host_heap_base]
1287
.get_ptr::<T>()
1288
.byte_add(wasm_addr)
1289
.byte_add(offset);
1290
ControlFlow::Continue(addr)
1291
}
1292
}
1293
}
1294
1295
#[test]
1296
fn simple_push_pop() {
1297
let mut state = MachineState::with_stack(16);
1298
let pc = ExecutingPc::default();
1299
unsafe {
1300
let mut bytecode = [0; 10];
1301
let mut i = Interpreter {
1302
state: &mut state,
1303
// this isn't actually read so just manufacture a dummy one
1304
pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()),
1305
executing_pc: pc.as_ref(),
1306
};
1307
assert!(i.push::<crate::Ret, _>(0_i32).is_continue());
1308
assert_eq!(i.pop::<i32>(), 0_i32);
1309
assert!(i.push::<crate::Ret, _>(1_i32).is_continue());
1310
assert!(i.push::<crate::Ret, _>(2_i32).is_continue());
1311
assert!(i.push::<crate::Ret, _>(3_i32).is_continue());
1312
assert!(i.push::<crate::Ret, _>(4_i32).is_continue());
1313
assert!(i.push::<crate::Ret, _>(5_i32).is_break());
1314
assert!(i.push::<crate::Ret, _>(6_i32).is_break());
1315
assert_eq!(i.pop::<i32>(), 4_i32);
1316
assert_eq!(i.pop::<i32>(), 3_i32);
1317
assert_eq!(i.pop::<i32>(), 2_i32);
1318
assert_eq!(i.pop::<i32>(), 1_i32);
1319
}
1320
}
1321
1322
macro_rules! br_if_imm {
1323
($(
1324
fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset)
1325
= $camel:ident / $op:tt / $get:ident;
1326
)*) => {$(
1327
fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> {
1328
let a = self.state[a].$get();
1329
if a $op b.into() {
1330
self.pc_rel_jump::<crate::$camel>(offset)
1331
} else {
1332
ControlFlow::Continue(())
1333
}
1334
}
1335
)*};
1336
}
1337
1338
impl OpVisitor for Interpreter<'_> {
1339
type BytecodeStream = UnsafeBytecodeStream;
1340
type Return = ControlFlow<Done>;
1341
1342
fn bytecode(&mut self) -> &mut UnsafeBytecodeStream {
1343
&mut self.pc
1344
}
1345
1346
fn ret(&mut self) -> ControlFlow<Done> {
1347
let lr = self.state.lr;
1348
if lr == HOST_RETURN_ADDR {
1349
self.done_return_to_host()
1350
} else {
1351
self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) };
1352
ControlFlow::Continue(())
1353
}
1354
}
1355
1356
fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1357
let return_addr = self.pc.as_ptr();
1358
self.state.lr = return_addr.as_ptr();
1359
self.pc_rel_jump::<crate::Call>(offset)
1360
}
1361
1362
fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1363
let return_addr = self.pc.as_ptr();
1364
self.state.lr = return_addr.as_ptr();
1365
self.state[XReg::x0] = self.state[arg1];
1366
self.pc_rel_jump::<crate::Call1>(offset)
1367
}
1368
1369
fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1370
let return_addr = self.pc.as_ptr();
1371
self.state.lr = return_addr.as_ptr();
1372
let (x0, x1) = (self.state[arg1], self.state[arg2]);
1373
self.state[XReg::x0] = x0;
1374
self.state[XReg::x1] = x1;
1375
self.pc_rel_jump::<crate::Call2>(offset)
1376
}
1377
1378
fn call3(
1379
&mut self,
1380
arg1: XReg,
1381
arg2: XReg,
1382
arg3: XReg,
1383
offset: PcRelOffset,
1384
) -> ControlFlow<Done> {
1385
let return_addr = self.pc.as_ptr();
1386
self.state.lr = return_addr.as_ptr();
1387
let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1388
self.state[XReg::x0] = x0;
1389
self.state[XReg::x1] = x1;
1390
self.state[XReg::x2] = x2;
1391
self.pc_rel_jump::<crate::Call3>(offset)
1392
}
1393
1394
fn call4(
1395
&mut self,
1396
arg1: XReg,
1397
arg2: XReg,
1398
arg3: XReg,
1399
arg4: XReg,
1400
offset: PcRelOffset,
1401
) -> ControlFlow<Done> {
1402
let return_addr = self.pc.as_ptr();
1403
self.state.lr = return_addr.as_ptr();
1404
let (x0, x1, x2, x3) = (
1405
self.state[arg1],
1406
self.state[arg2],
1407
self.state[arg3],
1408
self.state[arg4],
1409
);
1410
self.state[XReg::x0] = x0;
1411
self.state[XReg::x1] = x1;
1412
self.state[XReg::x2] = x2;
1413
self.state[XReg::x3] = x3;
1414
self.pc_rel_jump::<crate::Call4>(offset)
1415
}
1416
1417
fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> {
1418
let return_addr = self.pc.as_ptr();
1419
self.state.lr = return_addr.as_ptr();
1420
// SAFETY: part of the unsafe contract of the interpreter is only valid
1421
// bytecode is interpreted, so the jump destination is part of the validity
1422
// of the bytecode itself.
1423
unsafe {
1424
self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr()));
1425
}
1426
ControlFlow::Continue(())
1427
}
1428
1429
fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1430
self.pc_rel_jump::<crate::Jump>(offset)
1431
}
1432
1433
fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
1434
unsafe {
1435
self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
1436
}
1437
ControlFlow::Continue(())
1438
}
1439
1440
fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1441
let cond = self.state[cond].get_u32();
1442
if cond != 0 {
1443
self.pc_rel_jump::<crate::BrIf>(offset)
1444
} else {
1445
ControlFlow::Continue(())
1446
}
1447
}
1448
1449
fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1450
let cond = self.state[cond].get_u32();
1451
if cond == 0 {
1452
self.pc_rel_jump::<crate::BrIfNot>(offset)
1453
} else {
1454
ControlFlow::Continue(())
1455
}
1456
}
1457
1458
fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1459
let a = self.state[a].get_u32();
1460
let b = self.state[b].get_u32();
1461
if a == b {
1462
self.pc_rel_jump::<crate::BrIfXeq32>(offset)
1463
} else {
1464
ControlFlow::Continue(())
1465
}
1466
}
1467
1468
fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1469
let a = self.state[a].get_u32();
1470
let b = self.state[b].get_u32();
1471
if a != b {
1472
self.pc_rel_jump::<crate::BrIfXneq32>(offset)
1473
} else {
1474
ControlFlow::Continue(())
1475
}
1476
}
1477
1478
fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1479
let a = self.state[a].get_i32();
1480
let b = self.state[b].get_i32();
1481
if a < b {
1482
self.pc_rel_jump::<crate::BrIfXslt32>(offset)
1483
} else {
1484
ControlFlow::Continue(())
1485
}
1486
}
1487
1488
fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1489
let a = self.state[a].get_i32();
1490
let b = self.state[b].get_i32();
1491
if a <= b {
1492
self.pc_rel_jump::<crate::BrIfXslteq32>(offset)
1493
} else {
1494
ControlFlow::Continue(())
1495
}
1496
}
1497
1498
fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1499
let a = self.state[a].get_u32();
1500
let b = self.state[b].get_u32();
1501
if a < b {
1502
self.pc_rel_jump::<crate::BrIfXult32>(offset)
1503
} else {
1504
ControlFlow::Continue(())
1505
}
1506
}
1507
1508
fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1509
let a = self.state[a].get_u32();
1510
let b = self.state[b].get_u32();
1511
if a <= b {
1512
self.pc_rel_jump::<crate::BrIfXulteq32>(offset)
1513
} else {
1514
ControlFlow::Continue(())
1515
}
1516
}
1517
1518
fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1519
let a = self.state[a].get_u64();
1520
let b = self.state[b].get_u64();
1521
if a == b {
1522
self.pc_rel_jump::<crate::BrIfXeq64>(offset)
1523
} else {
1524
ControlFlow::Continue(())
1525
}
1526
}
1527
1528
fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1529
let a = self.state[a].get_u64();
1530
let b = self.state[b].get_u64();
1531
if a != b {
1532
self.pc_rel_jump::<crate::BrIfXneq64>(offset)
1533
} else {
1534
ControlFlow::Continue(())
1535
}
1536
}
1537
1538
fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1539
let a = self.state[a].get_i64();
1540
let b = self.state[b].get_i64();
1541
if a < b {
1542
self.pc_rel_jump::<crate::BrIfXslt64>(offset)
1543
} else {
1544
ControlFlow::Continue(())
1545
}
1546
}
1547
1548
fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1549
let a = self.state[a].get_i64();
1550
let b = self.state[b].get_i64();
1551
if a <= b {
1552
self.pc_rel_jump::<crate::BrIfXslteq64>(offset)
1553
} else {
1554
ControlFlow::Continue(())
1555
}
1556
}
1557
1558
fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1559
let a = self.state[a].get_u64();
1560
let b = self.state[b].get_u64();
1561
if a < b {
1562
self.pc_rel_jump::<crate::BrIfXult64>(offset)
1563
} else {
1564
ControlFlow::Continue(())
1565
}
1566
}
1567
1568
fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1569
let a = self.state[a].get_u64();
1570
let b = self.state[b].get_u64();
1571
if a <= b {
1572
self.pc_rel_jump::<crate::BrIfXulteq64>(offset)
1573
} else {
1574
ControlFlow::Continue(())
1575
}
1576
}
1577
1578
br_if_imm! {
1579
fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1580
= BrIfXeq32I8 / == / get_i32;
1581
fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1582
= BrIfXeq32I32 / == / get_i32;
1583
fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1584
= BrIfXneq32I8 / != / get_i32;
1585
fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1586
= BrIfXneq32I32 / != / get_i32;
1587
1588
fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1589
= BrIfXslt32I8 / < / get_i32;
1590
fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1591
= BrIfXslt32I32 / < / get_i32;
1592
fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1593
= BrIfXsgt32I8 / > / get_i32;
1594
fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1595
= BrIfXsgt32I32 / > / get_i32;
1596
fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1597
= BrIfXslteq32I8 / <= / get_i32;
1598
fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1599
= BrIfXslteq32I32 / <= / get_i32;
1600
fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1601
= BrIfXsgteq32I8 / >= / get_i32;
1602
fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1603
= BrIfXsgteq32I32 / >= / get_i32;
1604
1605
fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1606
= BrIfXult32U8 / < / get_u32;
1607
fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1608
= BrIfXult32U32 / < / get_u32;
1609
fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1610
= BrIfXugt32U8 / > / get_u32;
1611
fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1612
= BrIfXugt32U32 / > / get_u32;
1613
fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1614
= BrIfXulteq32U8 / <= / get_u32;
1615
fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1616
= BrIfXulteq32U32 / <= / get_u32;
1617
fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1618
= BrIfXugteq32U8 / >= / get_u32;
1619
fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1620
= BrIfXugteq32U32 / >= / get_u32;
1621
1622
fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1623
= BrIfXeq64I8 / == / get_i64;
1624
fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1625
= BrIfXeq64I32 / == / get_i64;
1626
fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1627
= BrIfXneq64I8 / != / get_i64;
1628
fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1629
= BrIfXneq64I32 / != / get_i64;
1630
1631
fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1632
= BrIfXslt64I8 / < / get_i64;
1633
fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1634
= BrIfXslt64I32 / < / get_i64;
1635
fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1636
= BrIfXsgt64I8 / > / get_i64;
1637
fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1638
= BrIfXsgt64I32 / > / get_i64;
1639
fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1640
= BrIfXslteq64I8 / <= / get_i64;
1641
fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1642
= BrIfXslteq64I32 / <= / get_i64;
1643
fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1644
= BrIfXsgteq64I8 / >= / get_i64;
1645
fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1646
= BrIfXsgteq64I32 / >= / get_i64;
1647
1648
fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1649
= BrIfXult64U8 / < / get_u64;
1650
fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1651
= BrIfXult64U32 / < / get_u64;
1652
fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1653
= BrIfXugt64U8 / > / get_u64;
1654
fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1655
= BrIfXugt64U32 / > / get_u64;
1656
fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1657
= BrIfXulteq64U8 / <= / get_u64;
1658
fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1659
= BrIfXulteq64U32 / <= / get_u64;
1660
fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1661
= BrIfXugteq64U8 / >= / get_u64;
1662
fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1663
= BrIfXugteq64U32 / >= / get_u64;
1664
}
1665
1666
fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1667
let val = self.state[src];
1668
self.state[dst] = val;
1669
ControlFlow::Continue(())
1670
}
1671
1672
fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> {
1673
self.state[dst].set_i64(i64::from(imm));
1674
ControlFlow::Continue(())
1675
}
1676
1677
fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> {
1678
self.state[dst].set_i64(0);
1679
ControlFlow::Continue(())
1680
}
1681
1682
fn xone(&mut self, dst: XReg) -> ControlFlow<Done> {
1683
self.state[dst].set_i64(1);
1684
ControlFlow::Continue(())
1685
}
1686
1687
fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> {
1688
self.state[dst].set_i64(i64::from(imm));
1689
ControlFlow::Continue(())
1690
}
1691
1692
fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> {
1693
self.state[dst].set_i64(i64::from(imm));
1694
ControlFlow::Continue(())
1695
}
1696
1697
fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> {
1698
self.state[dst].set_i64(imm);
1699
ControlFlow::Continue(())
1700
}
1701
1702
fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1703
let a = self.state[operands.src1].get_u32();
1704
let b = self.state[operands.src2].get_u32();
1705
self.state[operands.dst].set_u32(a.wrapping_add(b));
1706
ControlFlow::Continue(())
1707
}
1708
1709
fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1710
self.xadd32_u32(dst, src1, src2.into())
1711
}
1712
1713
fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1714
let a = self.state[src1].get_u32();
1715
self.state[dst].set_u32(a.wrapping_add(src2));
1716
ControlFlow::Continue(())
1717
}
1718
1719
fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1720
let a = self.state[operands.src1].get_u64();
1721
let b = self.state[operands.src2].get_u64();
1722
self.state[operands.dst].set_u64(a.wrapping_add(b));
1723
ControlFlow::Continue(())
1724
}
1725
1726
fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1727
self.xadd64_u32(dst, src1, src2.into())
1728
}
1729
1730
fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1731
let a = self.state[src1].get_u64();
1732
self.state[dst].set_u64(a.wrapping_add(src2.into()));
1733
ControlFlow::Continue(())
1734
}
1735
1736
fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1737
let a = self.state[src1].get_u32();
1738
let b = self.state[src2].get_u32();
1739
let c = self.state[src3].get_u32();
1740
self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
1741
ControlFlow::Continue(())
1742
}
1743
1744
fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1745
let a = self.state[src1].get_u64();
1746
let b = self.state[src2].get_u64();
1747
let c = self.state[src3].get_u64();
1748
self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
1749
ControlFlow::Continue(())
1750
}
1751
1752
fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1753
let a = self.state[operands.src1].get_u32();
1754
let b = self.state[operands.src2].get_u32();
1755
self.state[operands.dst].set_u32(a.wrapping_sub(b));
1756
ControlFlow::Continue(())
1757
}
1758
1759
fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1760
self.xsub32_u32(dst, src1, src2.into())
1761
}
1762
1763
fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1764
let a = self.state[src1].get_u32();
1765
self.state[dst].set_u32(a.wrapping_sub(src2));
1766
ControlFlow::Continue(())
1767
}
1768
1769
fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1770
let a = self.state[operands.src1].get_u64();
1771
let b = self.state[operands.src2].get_u64();
1772
self.state[operands.dst].set_u64(a.wrapping_sub(b));
1773
ControlFlow::Continue(())
1774
}
1775
1776
fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1777
self.xsub64_u32(dst, src1, src2.into())
1778
}
1779
1780
fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1781
let a = self.state[src1].get_u64();
1782
self.state[dst].set_u64(a.wrapping_sub(src2.into()));
1783
ControlFlow::Continue(())
1784
}
1785
1786
fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1787
let a = self.state[operands.src1].get_u32();
1788
let b = self.state[operands.src2].get_u32();
1789
self.state[operands.dst].set_u32(a.wrapping_mul(b));
1790
ControlFlow::Continue(())
1791
}
1792
1793
fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1794
self.xmul32_s32(dst, src1, src2.into())
1795
}
1796
1797
fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1798
let a = self.state[src1].get_i32();
1799
self.state[dst].set_i32(a.wrapping_mul(src2));
1800
ControlFlow::Continue(())
1801
}
1802
1803
fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1804
let a = self.state[operands.src1].get_u64();
1805
let b = self.state[operands.src2].get_u64();
1806
self.state[operands.dst].set_u64(a.wrapping_mul(b));
1807
ControlFlow::Continue(())
1808
}
1809
1810
fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1811
self.xmul64_s32(dst, src1, src2.into())
1812
}
1813
1814
fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1815
let a = self.state[src1].get_i64();
1816
self.state[dst].set_i64(a.wrapping_mul(src2.into()));
1817
ControlFlow::Continue(())
1818
}
1819
1820
fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1821
let a = self.state[operands.src1].get_u32();
1822
let b = self.state[operands.src2].get_u32();
1823
self.state[operands.dst].set_u32(a.wrapping_shl(b));
1824
ControlFlow::Continue(())
1825
}
1826
1827
fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1828
let a = self.state[operands.src1].get_u32();
1829
let b = self.state[operands.src2].get_u32();
1830
self.state[operands.dst].set_u32(a.wrapping_shr(b));
1831
ControlFlow::Continue(())
1832
}
1833
1834
fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1835
let a = self.state[operands.src1].get_i32();
1836
let b = self.state[operands.src2].get_u32();
1837
self.state[operands.dst].set_i32(a.wrapping_shr(b));
1838
ControlFlow::Continue(())
1839
}
1840
1841
fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1842
let a = self.state[operands.src1].get_u64();
1843
let b = self.state[operands.src2].get_u32();
1844
self.state[operands.dst].set_u64(a.wrapping_shl(b));
1845
ControlFlow::Continue(())
1846
}
1847
1848
fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1849
let a = self.state[operands.src1].get_u64();
1850
let b = self.state[operands.src2].get_u32();
1851
self.state[operands.dst].set_u64(a.wrapping_shr(b));
1852
ControlFlow::Continue(())
1853
}
1854
1855
fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1856
let a = self.state[operands.src1].get_i64();
1857
let b = self.state[operands.src2].get_u32();
1858
self.state[operands.dst].set_i64(a.wrapping_shr(b));
1859
ControlFlow::Continue(())
1860
}
1861
1862
fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1863
let a = self.state[operands.src1].get_u32();
1864
let b = u32::from(u8::from(operands.src2));
1865
self.state[operands.dst].set_u32(a.wrapping_shl(b));
1866
ControlFlow::Continue(())
1867
}
1868
1869
fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1870
let a = self.state[operands.src1].get_u32();
1871
let b = u32::from(u8::from(operands.src2));
1872
self.state[operands.dst].set_u32(a.wrapping_shr(b));
1873
ControlFlow::Continue(())
1874
}
1875
1876
fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1877
let a = self.state[operands.src1].get_i32();
1878
let b = u32::from(u8::from(operands.src2));
1879
self.state[operands.dst].set_i32(a.wrapping_shr(b));
1880
ControlFlow::Continue(())
1881
}
1882
1883
fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1884
let a = self.state[operands.src1].get_u64();
1885
let b = u32::from(u8::from(operands.src2));
1886
self.state[operands.dst].set_u64(a.wrapping_shl(b));
1887
ControlFlow::Continue(())
1888
}
1889
1890
fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1891
let a = self.state[operands.src1].get_u64();
1892
let b = u32::from(u8::from(operands.src2));
1893
self.state[operands.dst].set_u64(a.wrapping_shr(b));
1894
ControlFlow::Continue(())
1895
}
1896
1897
fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1898
let a = self.state[operands.src1].get_i64();
1899
let b = u32::from(u8::from(operands.src2));
1900
self.state[operands.dst].set_i64(a.wrapping_shr(b));
1901
ControlFlow::Continue(())
1902
}
1903
1904
fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1905
let a = self.state[src].get_i32();
1906
self.state[dst].set_i32(a.wrapping_neg());
1907
ControlFlow::Continue(())
1908
}
1909
1910
fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1911
let a = self.state[src].get_i64();
1912
self.state[dst].set_i64(a.wrapping_neg());
1913
ControlFlow::Continue(())
1914
}
1915
1916
fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1917
let a = self.state[operands.src1].get_u64();
1918
let b = self.state[operands.src2].get_u64();
1919
self.state[operands.dst].set_u32(u32::from(a == b));
1920
ControlFlow::Continue(())
1921
}
1922
1923
fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1924
let a = self.state[operands.src1].get_u64();
1925
let b = self.state[operands.src2].get_u64();
1926
self.state[operands.dst].set_u32(u32::from(a != b));
1927
ControlFlow::Continue(())
1928
}
1929
1930
fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1931
let a = self.state[operands.src1].get_i64();
1932
let b = self.state[operands.src2].get_i64();
1933
self.state[operands.dst].set_u32(u32::from(a < b));
1934
ControlFlow::Continue(())
1935
}
1936
1937
fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1938
let a = self.state[operands.src1].get_i64();
1939
let b = self.state[operands.src2].get_i64();
1940
self.state[operands.dst].set_u32(u32::from(a <= b));
1941
ControlFlow::Continue(())
1942
}
1943
1944
fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1945
let a = self.state[operands.src1].get_u64();
1946
let b = self.state[operands.src2].get_u64();
1947
self.state[operands.dst].set_u32(u32::from(a < b));
1948
ControlFlow::Continue(())
1949
}
1950
1951
fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1952
let a = self.state[operands.src1].get_u64();
1953
let b = self.state[operands.src2].get_u64();
1954
self.state[operands.dst].set_u32(u32::from(a <= b));
1955
ControlFlow::Continue(())
1956
}
1957
1958
fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1959
let a = self.state[operands.src1].get_u32();
1960
let b = self.state[operands.src2].get_u32();
1961
self.state[operands.dst].set_u32(u32::from(a == b));
1962
ControlFlow::Continue(())
1963
}
1964
1965
fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1966
let a = self.state[operands.src1].get_u32();
1967
let b = self.state[operands.src2].get_u32();
1968
self.state[operands.dst].set_u32(u32::from(a != b));
1969
ControlFlow::Continue(())
1970
}
1971
1972
fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1973
let a = self.state[operands.src1].get_i32();
1974
let b = self.state[operands.src2].get_i32();
1975
self.state[operands.dst].set_u32(u32::from(a < b));
1976
ControlFlow::Continue(())
1977
}
1978
1979
fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1980
let a = self.state[operands.src1].get_i32();
1981
let b = self.state[operands.src2].get_i32();
1982
self.state[operands.dst].set_u32(u32::from(a <= b));
1983
ControlFlow::Continue(())
1984
}
1985
1986
fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1987
let a = self.state[operands.src1].get_u32();
1988
let b = self.state[operands.src2].get_u32();
1989
self.state[operands.dst].set_u32(u32::from(a < b));
1990
ControlFlow::Continue(())
1991
}
1992
1993
fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1994
let a = self.state[operands.src1].get_u32();
1995
let b = self.state[operands.src2].get_u32();
1996
self.state[operands.dst].set_u32(u32::from(a <= b));
1997
ControlFlow::Continue(())
1998
}
1999
2000
fn push_frame(&mut self) -> ControlFlow<Done> {
2001
self.push::<crate::PushFrame, _>(self.state.lr)?;
2002
self.push::<crate::PushFrame, _>(self.state.fp)?;
2003
self.state.fp = self.state[XReg::sp].get_ptr();
2004
ControlFlow::Continue(())
2005
}
2006
2007
#[inline]
2008
fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2009
// Decrement the stack pointer `amt` bytes plus 2 pointers more for
2010
// fp/lr.
2011
let ptr_size = size_of::<usize>();
2012
let full_amt = usize::from(amt) + 2 * ptr_size;
2013
let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt);
2014
self.set_sp::<crate::PushFrameSave>(new_sp)?;
2015
2016
unsafe {
2017
// Emulate `push_frame` by placing `lr` and `fp` onto the stack, in
2018
// that order, at the top of the allocated area.
2019
self.store_ne::<_, crate::PushFrameSave>(
2020
AddrO32 {
2021
addr: XReg::sp,
2022
offset: (full_amt - 1 * ptr_size) as i32,
2023
},
2024
self.state.lr,
2025
)?;
2026
self.store_ne::<_, crate::PushFrameSave>(
2027
AddrO32 {
2028
addr: XReg::sp,
2029
offset: (full_amt - 2 * ptr_size) as i32,
2030
},
2031
self.state.fp,
2032
)?;
2033
2034
// Set `fp` to the top of our frame, where `fp` is stored.
2035
let mut offset = amt as i32;
2036
self.state.fp = self.state[XReg::sp]
2037
.get_ptr::<u8>()
2038
.byte_offset(offset as isize);
2039
2040
// Next save any registers in `regs` to the stack.
2041
for reg in regs {
2042
offset -= 8;
2043
self.store_ne::<_, crate::PushFrameSave>(
2044
AddrO32 {
2045
addr: XReg::sp,
2046
offset,
2047
},
2048
self.state[reg].get_u64(),
2049
)?;
2050
}
2051
}
2052
ControlFlow::Continue(())
2053
}
2054
2055
fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2056
// Restore all registers in `regs`, followed by the normal `pop_frame`
2057
// opcode below to restore fp/lr.
2058
unsafe {
2059
let mut offset = i32::from(amt);
2060
for reg in regs {
2061
offset -= 8;
2062
let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 {
2063
addr: XReg::sp,
2064
offset,
2065
})?;
2066
self.state[reg].set_u64(val);
2067
}
2068
}
2069
self.pop_frame()
2070
}
2071
2072
fn pop_frame(&mut self) -> ControlFlow<Done> {
2073
self.set_sp_unchecked(self.state.fp);
2074
let fp = self.pop();
2075
let lr = self.pop();
2076
self.state.fp = fp;
2077
self.state.lr = lr;
2078
ControlFlow::Continue(())
2079
}
2080
2081
fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> {
2082
let idx = self.state[idx].get_u32().min(amt - 1) as isize;
2083
// SAFETY: part of the contract of the interpreter is only dealing with
2084
// valid bytecode, so this offset should be safe.
2085
self.pc = unsafe { self.pc.offset(idx * 4) };
2086
2087
// Decode the `PcRelOffset` without tampering with `self.pc` as the
2088
// jump is relative to `self.pc`.
2089
let mut tmp = self.pc;
2090
let Ok(rel) = PcRelOffset::decode(&mut tmp);
2091
let offset = isize::try_from(i32::from(rel)).unwrap();
2092
self.pc = unsafe { self.pc.offset(offset) };
2093
ControlFlow::Continue(())
2094
}
2095
2096
fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> {
2097
let amt = usize::try_from(amt).unwrap();
2098
let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt);
2099
self.set_sp::<crate::StackAlloc32>(new_sp)?;
2100
ControlFlow::Continue(())
2101
}
2102
2103
fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> {
2104
let amt = usize::try_from(amt).unwrap();
2105
let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt);
2106
self.set_sp_unchecked(new_sp);
2107
ControlFlow::Continue(())
2108
}
2109
2110
fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2111
let src = self.state[src].get_u64() as u8;
2112
self.state[dst].set_u64(src.into());
2113
ControlFlow::Continue(())
2114
}
2115
2116
fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2117
let src = self.state[src].get_u64() as u16;
2118
self.state[dst].set_u64(src.into());
2119
ControlFlow::Continue(())
2120
}
2121
2122
fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2123
let src = self.state[src].get_u64() as u32;
2124
self.state[dst].set_u64(src.into());
2125
ControlFlow::Continue(())
2126
}
2127
2128
fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2129
let src = self.state[src].get_i64() as i8;
2130
self.state[dst].set_i64(src.into());
2131
ControlFlow::Continue(())
2132
}
2133
2134
fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2135
let src = self.state[src].get_i64() as i16;
2136
self.state[dst].set_i64(src.into());
2137
ControlFlow::Continue(())
2138
}
2139
2140
fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2141
let src = self.state[src].get_i64() as i32;
2142
self.state[dst].set_i64(src.into());
2143
ControlFlow::Continue(())
2144
}
2145
2146
fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2147
let a = self.state[operands.src1].get_i32();
2148
let b = self.state[operands.src2].get_i32();
2149
match a.checked_div(b) {
2150
Some(result) => {
2151
self.state[operands.dst].set_i32(result);
2152
ControlFlow::Continue(())
2153
}
2154
None => {
2155
let kind = if b == 0 {
2156
TrapKind::DivideByZero
2157
} else {
2158
TrapKind::IntegerOverflow
2159
};
2160
self.done_trap_kind::<crate::XDiv32S>(Some(kind))
2161
}
2162
}
2163
}
2164
2165
fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2166
let a = self.state[operands.src1].get_i64();
2167
let b = self.state[operands.src2].get_i64();
2168
match a.checked_div(b) {
2169
Some(result) => {
2170
self.state[operands.dst].set_i64(result);
2171
ControlFlow::Continue(())
2172
}
2173
None => {
2174
let kind = if b == 0 {
2175
TrapKind::DivideByZero
2176
} else {
2177
TrapKind::IntegerOverflow
2178
};
2179
self.done_trap_kind::<crate::XDiv64S>(Some(kind))
2180
}
2181
}
2182
}
2183
2184
fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2185
let a = self.state[operands.src1].get_u32();
2186
let b = self.state[operands.src2].get_u32();
2187
match a.checked_div(b) {
2188
Some(result) => {
2189
self.state[operands.dst].set_u32(result);
2190
ControlFlow::Continue(())
2191
}
2192
None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2193
}
2194
}
2195
2196
fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2197
let a = self.state[operands.src1].get_u64();
2198
let b = self.state[operands.src2].get_u64();
2199
match a.checked_div(b) {
2200
Some(result) => {
2201
self.state[operands.dst].set_u64(result);
2202
ControlFlow::Continue(())
2203
}
2204
None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2205
}
2206
}
2207
2208
fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2209
let a = self.state[operands.src1].get_i32();
2210
let b = self.state[operands.src2].get_i32();
2211
let result = if a == i32::MIN && b == -1 {
2212
Some(0)
2213
} else {
2214
a.checked_rem(b)
2215
};
2216
match result {
2217
Some(result) => {
2218
self.state[operands.dst].set_i32(result);
2219
ControlFlow::Continue(())
2220
}
2221
None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)),
2222
}
2223
}
2224
2225
fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2226
let a = self.state[operands.src1].get_i64();
2227
let b = self.state[operands.src2].get_i64();
2228
let result = if a == i64::MIN && b == -1 {
2229
Some(0)
2230
} else {
2231
a.checked_rem(b)
2232
};
2233
match result {
2234
Some(result) => {
2235
self.state[operands.dst].set_i64(result);
2236
ControlFlow::Continue(())
2237
}
2238
None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)),
2239
}
2240
}
2241
2242
fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2243
let a = self.state[operands.src1].get_u32();
2244
let b = self.state[operands.src2].get_u32();
2245
match a.checked_rem(b) {
2246
Some(result) => {
2247
self.state[operands.dst].set_u32(result);
2248
ControlFlow::Continue(())
2249
}
2250
None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)),
2251
}
2252
}
2253
2254
fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2255
let a = self.state[operands.src1].get_u64();
2256
let b = self.state[operands.src2].get_u64();
2257
match a.checked_rem(b) {
2258
Some(result) => {
2259
self.state[operands.dst].set_u64(result);
2260
ControlFlow::Continue(())
2261
}
2262
None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)),
2263
}
2264
}
2265
2266
fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2267
let a = self.state[operands.src1].get_u32();
2268
let b = self.state[operands.src2].get_u32();
2269
self.state[operands.dst].set_u32(a & b);
2270
ControlFlow::Continue(())
2271
}
2272
2273
fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2274
self.xband32_s32(dst, src1, src2.into())
2275
}
2276
2277
fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2278
let a = self.state[src1].get_i32();
2279
self.state[dst].set_i32(a & src2);
2280
ControlFlow::Continue(())
2281
}
2282
2283
fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2284
let a = self.state[operands.src1].get_u64();
2285
let b = self.state[operands.src2].get_u64();
2286
self.state[operands.dst].set_u64(a & b);
2287
ControlFlow::Continue(())
2288
}
2289
2290
fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2291
self.xband64_s32(dst, src1, src2.into())
2292
}
2293
2294
fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2295
let a = self.state[src1].get_i64();
2296
self.state[dst].set_i64(a & i64::from(src2));
2297
ControlFlow::Continue(())
2298
}
2299
2300
fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2301
let a = self.state[operands.src1].get_u32();
2302
let b = self.state[operands.src2].get_u32();
2303
self.state[operands.dst].set_u32(a | b);
2304
ControlFlow::Continue(())
2305
}
2306
2307
fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2308
self.xbor32_s32(dst, src1, src2.into())
2309
}
2310
2311
fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2312
let a = self.state[src1].get_i32();
2313
self.state[dst].set_i32(a | src2);
2314
ControlFlow::Continue(())
2315
}
2316
2317
fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2318
let a = self.state[operands.src1].get_u64();
2319
let b = self.state[operands.src2].get_u64();
2320
self.state[operands.dst].set_u64(a | b);
2321
ControlFlow::Continue(())
2322
}
2323
2324
fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2325
self.xbor64_s32(dst, src1, src2.into())
2326
}
2327
2328
fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2329
let a = self.state[src1].get_i64();
2330
self.state[dst].set_i64(a | i64::from(src2));
2331
ControlFlow::Continue(())
2332
}
2333
2334
fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2335
let a = self.state[operands.src1].get_u32();
2336
let b = self.state[operands.src2].get_u32();
2337
self.state[operands.dst].set_u32(a ^ b);
2338
ControlFlow::Continue(())
2339
}
2340
2341
fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2342
self.xbxor32_s32(dst, src1, src2.into())
2343
}
2344
2345
fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2346
let a = self.state[src1].get_i32();
2347
self.state[dst].set_i32(a ^ src2);
2348
ControlFlow::Continue(())
2349
}
2350
2351
fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2352
let a = self.state[operands.src1].get_u64();
2353
let b = self.state[operands.src2].get_u64();
2354
self.state[operands.dst].set_u64(a ^ b);
2355
ControlFlow::Continue(())
2356
}
2357
2358
fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2359
self.xbxor64_s32(dst, src1, src2.into())
2360
}
2361
2362
fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2363
let a = self.state[src1].get_i64();
2364
self.state[dst].set_i64(a ^ i64::from(src2));
2365
ControlFlow::Continue(())
2366
}
2367
2368
fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2369
let a = self.state[src].get_u32();
2370
self.state[dst].set_u32(!a);
2371
ControlFlow::Continue(())
2372
}
2373
2374
fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2375
let a = self.state[src].get_u64();
2376
self.state[dst].set_u64(!a);
2377
ControlFlow::Continue(())
2378
}
2379
2380
fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2381
let a = self.state[operands.src1].get_u32();
2382
let b = self.state[operands.src2].get_u32();
2383
self.state[operands.dst].set_u32(a.min(b));
2384
ControlFlow::Continue(())
2385
}
2386
2387
fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2388
let a = self.state[operands.src1].get_i32();
2389
let b = self.state[operands.src2].get_i32();
2390
self.state[operands.dst].set_i32(a.min(b));
2391
ControlFlow::Continue(())
2392
}
2393
2394
fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2395
let a = self.state[operands.src1].get_u32();
2396
let b = self.state[operands.src2].get_u32();
2397
self.state[operands.dst].set_u32(a.max(b));
2398
ControlFlow::Continue(())
2399
}
2400
2401
fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2402
let a = self.state[operands.src1].get_i32();
2403
let b = self.state[operands.src2].get_i32();
2404
self.state[operands.dst].set_i32(a.max(b));
2405
ControlFlow::Continue(())
2406
}
2407
2408
fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2409
let a = self.state[operands.src1].get_u64();
2410
let b = self.state[operands.src2].get_u64();
2411
self.state[operands.dst].set_u64(a.min(b));
2412
ControlFlow::Continue(())
2413
}
2414
2415
fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2416
let a = self.state[operands.src1].get_i64();
2417
let b = self.state[operands.src2].get_i64();
2418
self.state[operands.dst].set_i64(a.min(b));
2419
ControlFlow::Continue(())
2420
}
2421
2422
fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2423
let a = self.state[operands.src1].get_u64();
2424
let b = self.state[operands.src2].get_u64();
2425
self.state[operands.dst].set_u64(a.max(b));
2426
ControlFlow::Continue(())
2427
}
2428
2429
fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2430
let a = self.state[operands.src1].get_i64();
2431
let b = self.state[operands.src2].get_i64();
2432
self.state[operands.dst].set_i64(a.max(b));
2433
ControlFlow::Continue(())
2434
}
2435
2436
fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2437
let a = self.state[src].get_u32();
2438
self.state[dst].set_u32(a.trailing_zeros());
2439
ControlFlow::Continue(())
2440
}
2441
2442
fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2443
let a = self.state[src].get_u64();
2444
self.state[dst].set_u64(a.trailing_zeros().into());
2445
ControlFlow::Continue(())
2446
}
2447
2448
fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2449
let a = self.state[src].get_u32();
2450
self.state[dst].set_u32(a.leading_zeros());
2451
ControlFlow::Continue(())
2452
}
2453
2454
fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2455
let a = self.state[src].get_u64();
2456
self.state[dst].set_u64(a.leading_zeros().into());
2457
ControlFlow::Continue(())
2458
}
2459
2460
fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2461
let a = self.state[src].get_u32();
2462
self.state[dst].set_u32(a.count_ones());
2463
ControlFlow::Continue(())
2464
}
2465
2466
fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2467
let a = self.state[src].get_u64();
2468
self.state[dst].set_u64(a.count_ones().into());
2469
ControlFlow::Continue(())
2470
}
2471
2472
fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2473
let a = self.state[operands.src1].get_u32();
2474
let b = self.state[operands.src2].get_u32();
2475
self.state[operands.dst].set_u32(a.rotate_left(b));
2476
ControlFlow::Continue(())
2477
}
2478
2479
fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2480
let a = self.state[operands.src1].get_u64();
2481
let b = self.state[operands.src2].get_u32();
2482
self.state[operands.dst].set_u64(a.rotate_left(b));
2483
ControlFlow::Continue(())
2484
}
2485
2486
fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2487
let a = self.state[operands.src1].get_u32();
2488
let b = self.state[operands.src2].get_u32();
2489
self.state[operands.dst].set_u32(a.rotate_right(b));
2490
ControlFlow::Continue(())
2491
}
2492
2493
fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2494
let a = self.state[operands.src1].get_u64();
2495
let b = self.state[operands.src2].get_u32();
2496
self.state[operands.dst].set_u64(a.rotate_right(b));
2497
ControlFlow::Continue(())
2498
}
2499
2500
fn xselect32(
2501
&mut self,
2502
dst: XReg,
2503
cond: XReg,
2504
if_nonzero: XReg,
2505
if_zero: XReg,
2506
) -> ControlFlow<Done> {
2507
let result = if self.state[cond].get_u32() != 0 {
2508
self.state[if_nonzero].get_u32()
2509
} else {
2510
self.state[if_zero].get_u32()
2511
};
2512
self.state[dst].set_u32(result);
2513
ControlFlow::Continue(())
2514
}
2515
2516
fn xselect64(
2517
&mut self,
2518
dst: XReg,
2519
cond: XReg,
2520
if_nonzero: XReg,
2521
if_zero: XReg,
2522
) -> ControlFlow<Done> {
2523
let result = if self.state[cond].get_u32() != 0 {
2524
self.state[if_nonzero].get_u64()
2525
} else {
2526
self.state[if_zero].get_u64()
2527
};
2528
self.state[dst].set_u64(result);
2529
ControlFlow::Continue(())
2530
}
2531
2532
fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2533
let a = self.state[src].get_i32();
2534
self.state[dst].set_i32(a.wrapping_abs());
2535
ControlFlow::Continue(())
2536
}
2537
2538
fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2539
let a = self.state[src].get_i64();
2540
self.state[dst].set_i64(a.wrapping_abs());
2541
ControlFlow::Continue(())
2542
}
2543
2544
// =========================================================================
2545
// o32 addressing modes
2546
2547
fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2548
let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? };
2549
self.state[dst].set_u32(result.into());
2550
ControlFlow::Continue(())
2551
}
2552
2553
fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2554
let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? };
2555
self.state[dst].set_i32(result.into());
2556
ControlFlow::Continue(())
2557
}
2558
2559
fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2560
let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? };
2561
self.state[dst].set_u32(u16::from_le(result).into());
2562
ControlFlow::Continue(())
2563
}
2564
2565
fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2566
let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? };
2567
self.state[dst].set_i32(i16::from_le(result).into());
2568
ControlFlow::Continue(())
2569
}
2570
2571
fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2572
let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? };
2573
self.state[dst].set_i32(i32::from_le(result));
2574
ControlFlow::Continue(())
2575
}
2576
2577
fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2578
let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? };
2579
self.state[dst].set_i64(i64::from_le(result));
2580
ControlFlow::Continue(())
2581
}
2582
2583
fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2584
let val = self.state[val].get_u32() as u8;
2585
unsafe {
2586
self.store_ne::<u8, crate::XStore8O32>(addr, val)?;
2587
}
2588
ControlFlow::Continue(())
2589
}
2590
2591
fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2592
let val = self.state[val].get_u32() as u16;
2593
unsafe {
2594
self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?;
2595
}
2596
ControlFlow::Continue(())
2597
}
2598
2599
fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2600
let val = self.state[val].get_u32();
2601
unsafe {
2602
self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?;
2603
}
2604
ControlFlow::Continue(())
2605
}
2606
2607
fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2608
let val = self.state[val].get_u64();
2609
unsafe {
2610
self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?;
2611
}
2612
ControlFlow::Continue(())
2613
}
2614
2615
// =========================================================================
2616
// g32 addressing modes
2617
2618
fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2619
let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? };
2620
self.state[dst].set_u32(result.into());
2621
ControlFlow::Continue(())
2622
}
2623
2624
fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2625
let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? };
2626
self.state[dst].set_i32(result.into());
2627
ControlFlow::Continue(())
2628
}
2629
2630
fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2631
let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? };
2632
self.state[dst].set_u32(u16::from_le(result).into());
2633
ControlFlow::Continue(())
2634
}
2635
2636
fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2637
let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? };
2638
self.state[dst].set_i32(i16::from_le(result).into());
2639
ControlFlow::Continue(())
2640
}
2641
2642
fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2643
let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? };
2644
self.state[dst].set_i32(i32::from_le(result));
2645
ControlFlow::Continue(())
2646
}
2647
2648
fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2649
let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? };
2650
self.state[dst].set_i64(i64::from_le(result));
2651
ControlFlow::Continue(())
2652
}
2653
2654
fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2655
let val = self.state[val].get_u32() as u8;
2656
unsafe {
2657
self.store_ne::<u8, crate::XStore8G32>(addr, val)?;
2658
}
2659
ControlFlow::Continue(())
2660
}
2661
2662
fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2663
let val = self.state[val].get_u32() as u16;
2664
unsafe {
2665
self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?;
2666
}
2667
ControlFlow::Continue(())
2668
}
2669
2670
fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2671
let val = self.state[val].get_u32();
2672
unsafe {
2673
self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?;
2674
}
2675
ControlFlow::Continue(())
2676
}
2677
2678
fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2679
let val = self.state[val].get_u64();
2680
unsafe {
2681
self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?;
2682
}
2683
ControlFlow::Continue(())
2684
}
2685
2686
// =========================================================================
2687
// z addressing modes
2688
2689
fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2690
let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? };
2691
self.state[dst].set_u32(result.into());
2692
ControlFlow::Continue(())
2693
}
2694
2695
fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2696
let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? };
2697
self.state[dst].set_i32(result.into());
2698
ControlFlow::Continue(())
2699
}
2700
2701
fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2702
let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? };
2703
self.state[dst].set_u32(u16::from_le(result).into());
2704
ControlFlow::Continue(())
2705
}
2706
2707
fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2708
let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? };
2709
self.state[dst].set_i32(i16::from_le(result).into());
2710
ControlFlow::Continue(())
2711
}
2712
2713
fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2714
let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? };
2715
self.state[dst].set_i32(i32::from_le(result));
2716
ControlFlow::Continue(())
2717
}
2718
2719
fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2720
let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? };
2721
self.state[dst].set_i64(i64::from_le(result));
2722
ControlFlow::Continue(())
2723
}
2724
2725
fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2726
let val = self.state[val].get_u32() as u8;
2727
unsafe {
2728
self.store_ne::<u8, crate::XStore8Z>(addr, val)?;
2729
}
2730
ControlFlow::Continue(())
2731
}
2732
2733
fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2734
let val = self.state[val].get_u32() as u16;
2735
unsafe {
2736
self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?;
2737
}
2738
ControlFlow::Continue(())
2739
}
2740
2741
fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2742
let val = self.state[val].get_u32();
2743
unsafe {
2744
self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?;
2745
}
2746
ControlFlow::Continue(())
2747
}
2748
2749
fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2750
let val = self.state[val].get_u64();
2751
unsafe {
2752
self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?;
2753
}
2754
ControlFlow::Continue(())
2755
}
2756
2757
// =========================================================================
2758
// g32bne addressing modes
2759
2760
fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2761
let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? };
2762
self.state[dst].set_u32(result.into());
2763
ControlFlow::Continue(())
2764
}
2765
2766
fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2767
let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? };
2768
self.state[dst].set_i32(result.into());
2769
ControlFlow::Continue(())
2770
}
2771
2772
fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2773
let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? };
2774
self.state[dst].set_u32(u16::from_le(result).into());
2775
ControlFlow::Continue(())
2776
}
2777
2778
fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2779
let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? };
2780
self.state[dst].set_i32(i16::from_le(result).into());
2781
ControlFlow::Continue(())
2782
}
2783
2784
fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2785
let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? };
2786
self.state[dst].set_i32(i32::from_le(result));
2787
ControlFlow::Continue(())
2788
}
2789
2790
fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2791
let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? };
2792
self.state[dst].set_i64(i64::from_le(result));
2793
ControlFlow::Continue(())
2794
}
2795
2796
fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2797
let val = self.state[val].get_u32() as u8;
2798
unsafe {
2799
self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?;
2800
}
2801
ControlFlow::Continue(())
2802
}
2803
2804
fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2805
let val = self.state[val].get_u32() as u16;
2806
unsafe {
2807
self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?;
2808
}
2809
ControlFlow::Continue(())
2810
}
2811
2812
fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2813
let val = self.state[val].get_u32();
2814
unsafe {
2815
self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?;
2816
}
2817
ControlFlow::Continue(())
2818
}
2819
2820
fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2821
let val = self.state[val].get_u64();
2822
unsafe {
2823
self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?;
2824
}
2825
ControlFlow::Continue(())
2826
}
2827
}
2828
2829
impl ExtendedOpVisitor for Interpreter<'_> {
2830
fn nop(&mut self) -> ControlFlow<Done> {
2831
ControlFlow::Continue(())
2832
}
2833
2834
fn trap(&mut self) -> ControlFlow<Done> {
2835
self.done_trap::<crate::Trap>()
2836
}
2837
2838
fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
2839
self.done_call_indirect_host(id)
2840
}
2841
2842
fn xpcadd(&mut self, dst: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
2843
let pc = self.pc_rel::<crate::Xpcadd>(offset);
2844
self.state[dst].set_ptr(pc.as_ptr());
2845
ControlFlow::Continue(())
2846
}
2847
2848
fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2849
let src = self.state[src].get_u32();
2850
self.state[dst].set_u32(src.swap_bytes());
2851
ControlFlow::Continue(())
2852
}
2853
2854
fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2855
let src = self.state[src].get_u64();
2856
self.state[dst].set_u64(src.swap_bytes());
2857
ControlFlow::Continue(())
2858
}
2859
2860
fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return {
2861
let a = self.state[src].get_u32();
2862
if a == 0 {
2863
self.state[dst].set_u32(0);
2864
} else {
2865
self.state[dst].set_i32(-1);
2866
}
2867
ControlFlow::Continue(())
2868
}
2869
2870
fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return {
2871
let a = self.state[src].get_u64();
2872
if a == 0 {
2873
self.state[dst].set_u64(0);
2874
} else {
2875
self.state[dst].set_i64(-1);
2876
}
2877
ControlFlow::Continue(())
2878
}
2879
2880
fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2881
let a = self.state[operands.src1].get_u32();
2882
let b = self.state[operands.src2].get_u32();
2883
match a.checked_add(b) {
2884
Some(c) => {
2885
self.state[operands.dst].set_u32(c);
2886
ControlFlow::Continue(())
2887
}
2888
None => self.done_trap::<crate::Xadd32UoverflowTrap>(),
2889
}
2890
}
2891
2892
fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2893
let a = self.state[operands.src1].get_u64();
2894
let b = self.state[operands.src2].get_u64();
2895
match a.checked_add(b) {
2896
Some(c) => {
2897
self.state[operands.dst].set_u64(c);
2898
ControlFlow::Continue(())
2899
}
2900
None => self.done_trap::<crate::Xadd64UoverflowTrap>(),
2901
}
2902
}
2903
2904
fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2905
let a = self.state[operands.src1].get_i64();
2906
let b = self.state[operands.src2].get_i64();
2907
let result = ((i128::from(a) * i128::from(b)) >> 64) as i64;
2908
self.state[operands.dst].set_i64(result);
2909
ControlFlow::Continue(())
2910
}
2911
2912
fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2913
let a = self.state[operands.src1].get_u64();
2914
let b = self.state[operands.src2].get_u64();
2915
let result = ((u128::from(a) * u128::from(b)) >> 64) as u64;
2916
self.state[operands.dst].set_u64(result);
2917
ControlFlow::Continue(())
2918
}
2919
2920
// =========================================================================
2921
// o32 addressing modes for big-endian X-registers
2922
2923
fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2924
let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? };
2925
self.state[dst].set_u32(u16::from_be(result).into());
2926
ControlFlow::Continue(())
2927
}
2928
2929
fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2930
let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? };
2931
self.state[dst].set_i32(i16::from_be(result).into());
2932
ControlFlow::Continue(())
2933
}
2934
2935
fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2936
let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? };
2937
self.state[dst].set_i32(i32::from_be(result));
2938
ControlFlow::Continue(())
2939
}
2940
2941
fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2942
let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? };
2943
self.state[dst].set_i64(i64::from_be(result));
2944
ControlFlow::Continue(())
2945
}
2946
2947
fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2948
let val = self.state[val].get_u32() as u16;
2949
unsafe {
2950
self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?;
2951
}
2952
ControlFlow::Continue(())
2953
}
2954
2955
fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2956
let val = self.state[val].get_u32();
2957
unsafe {
2958
self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?;
2959
}
2960
ControlFlow::Continue(())
2961
}
2962
2963
fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2964
let val = self.state[val].get_u64();
2965
unsafe {
2966
self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?;
2967
}
2968
ControlFlow::Continue(())
2969
}
2970
2971
// =========================================================================
2972
// o32 addressing modes for little-endian F-registers
2973
2974
fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2975
let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? };
2976
self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2977
ControlFlow::Continue(())
2978
}
2979
2980
fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2981
let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? };
2982
self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2983
ControlFlow::Continue(())
2984
}
2985
2986
fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2987
let val = self.state[src].get_f32();
2988
unsafe {
2989
self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?;
2990
}
2991
ControlFlow::Continue(())
2992
}
2993
2994
fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2995
let val = self.state[src].get_f64();
2996
unsafe {
2997
self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?;
2998
}
2999
ControlFlow::Continue(())
3000
}
3001
3002
// =========================================================================
3003
// o32 addressing modes for big-endian F-registers
3004
3005
fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3006
let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? };
3007
self.state[dst].set_f32(f32::from_bits(u32::from_be(val)));
3008
ControlFlow::Continue(())
3009
}
3010
3011
fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3012
let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? };
3013
self.state[dst].set_f64(f64::from_bits(u64::from_be(val)));
3014
ControlFlow::Continue(())
3015
}
3016
3017
fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3018
let val = self.state[src].get_f32();
3019
unsafe {
3020
self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?;
3021
}
3022
ControlFlow::Continue(())
3023
}
3024
3025
fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3026
let val = self.state[src].get_f64();
3027
unsafe {
3028
self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?;
3029
}
3030
ControlFlow::Continue(())
3031
}
3032
3033
// =========================================================================
3034
// z addressing modes for little-endian F-registers
3035
3036
fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3037
let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? };
3038
self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3039
ControlFlow::Continue(())
3040
}
3041
3042
fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3043
let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? };
3044
self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3045
ControlFlow::Continue(())
3046
}
3047
3048
fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3049
let val = self.state[src].get_f32();
3050
unsafe {
3051
self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?;
3052
}
3053
ControlFlow::Continue(())
3054
}
3055
3056
fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3057
let val = self.state[src].get_f64();
3058
unsafe {
3059
self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?;
3060
}
3061
ControlFlow::Continue(())
3062
}
3063
3064
// =========================================================================
3065
// g32 addressing modes for little-endian F-registers
3066
3067
fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3068
let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? };
3069
self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3070
ControlFlow::Continue(())
3071
}
3072
3073
fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3074
let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? };
3075
self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3076
ControlFlow::Continue(())
3077
}
3078
3079
fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3080
let val = self.state[src].get_f32();
3081
unsafe {
3082
self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?;
3083
}
3084
ControlFlow::Continue(())
3085
}
3086
3087
fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3088
let val = self.state[src].get_f64();
3089
unsafe {
3090
self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?;
3091
}
3092
ControlFlow::Continue(())
3093
}
3094
3095
// =========================================================================
3096
// o32 addressing modes for little-endian V-registers
3097
3098
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3099
fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> {
3100
let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? };
3101
self.state[dst].set_u128(u128::from_le(val));
3102
ControlFlow::Continue(())
3103
}
3104
3105
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3106
fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> {
3107
let val = self.state[src].get_u128();
3108
unsafe {
3109
self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?;
3110
}
3111
ControlFlow::Continue(())
3112
}
3113
3114
// =========================================================================
3115
// z addressing modes for little-endian V-registers
3116
3117
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3118
fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3119
let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? };
3120
self.state[dst].set_u128(u128::from_le(val));
3121
ControlFlow::Continue(())
3122
}
3123
3124
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3125
fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> {
3126
let val = self.state[src].get_u128();
3127
unsafe {
3128
self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?;
3129
}
3130
ControlFlow::Continue(())
3131
}
3132
3133
// =========================================================================
3134
// g32 addressing modes for little-endian V-registers
3135
3136
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3137
fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> {
3138
let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? };
3139
self.state[dst].set_u128(u128::from_le(val));
3140
ControlFlow::Continue(())
3141
}
3142
3143
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3144
fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> {
3145
let val = self.state[src].get_u128();
3146
unsafe {
3147
self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?;
3148
}
3149
ControlFlow::Continue(())
3150
}
3151
3152
fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> {
3153
let fp = self.state.fp;
3154
self.state[dst].set_ptr(fp);
3155
ControlFlow::Continue(())
3156
}
3157
3158
fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> {
3159
let lr = self.state.lr;
3160
self.state[dst].set_ptr(lr);
3161
ControlFlow::Continue(())
3162
}
3163
3164
fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3165
let val = self.state[src];
3166
self.state[dst] = val;
3167
ControlFlow::Continue(())
3168
}
3169
3170
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3171
fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3172
let val = self.state[src];
3173
self.state[dst] = val;
3174
ControlFlow::Continue(())
3175
}
3176
3177
fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> {
3178
self.state[dst].set_f32(f32::from_bits(bits));
3179
ControlFlow::Continue(())
3180
}
3181
3182
fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> {
3183
self.state[dst].set_f64(f64::from_bits(bits));
3184
ControlFlow::Continue(())
3185
}
3186
3187
fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3188
let val = self.state[src].get_f32();
3189
self.state[dst].set_u32(val.to_bits());
3190
ControlFlow::Continue(())
3191
}
3192
3193
fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3194
let val = self.state[src].get_f64();
3195
self.state[dst].set_u64(val.to_bits());
3196
ControlFlow::Continue(())
3197
}
3198
3199
fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3200
let val = self.state[src].get_u32();
3201
self.state[dst].set_f32(f32::from_bits(val));
3202
ControlFlow::Continue(())
3203
}
3204
3205
fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3206
let val = self.state[src].get_u64();
3207
self.state[dst].set_f64(f64::from_bits(val));
3208
ControlFlow::Continue(())
3209
}
3210
3211
fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3212
let a = self.state[src1].get_f32();
3213
let b = self.state[src2].get_f32();
3214
self.state[dst].set_u32(u32::from(a == b));
3215
ControlFlow::Continue(())
3216
}
3217
3218
fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3219
let a = self.state[src1].get_f32();
3220
let b = self.state[src2].get_f32();
3221
self.state[dst].set_u32(u32::from(a != b));
3222
ControlFlow::Continue(())
3223
}
3224
3225
fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3226
let a = self.state[src1].get_f32();
3227
let b = self.state[src2].get_f32();
3228
self.state[dst].set_u32(u32::from(a < b));
3229
ControlFlow::Continue(())
3230
}
3231
3232
fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3233
let a = self.state[src1].get_f32();
3234
let b = self.state[src2].get_f32();
3235
self.state[dst].set_u32(u32::from(a <= b));
3236
ControlFlow::Continue(())
3237
}
3238
3239
fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3240
let a = self.state[src1].get_f64();
3241
let b = self.state[src2].get_f64();
3242
self.state[dst].set_u32(u32::from(a == b));
3243
ControlFlow::Continue(())
3244
}
3245
3246
fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3247
let a = self.state[src1].get_f64();
3248
let b = self.state[src2].get_f64();
3249
self.state[dst].set_u32(u32::from(a != b));
3250
ControlFlow::Continue(())
3251
}
3252
3253
fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3254
let a = self.state[src1].get_f64();
3255
let b = self.state[src2].get_f64();
3256
self.state[dst].set_u32(u32::from(a < b));
3257
ControlFlow::Continue(())
3258
}
3259
3260
fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3261
let a = self.state[src1].get_f64();
3262
let b = self.state[src2].get_f64();
3263
self.state[dst].set_u32(u32::from(a <= b));
3264
ControlFlow::Continue(())
3265
}
3266
3267
fn fselect32(
3268
&mut self,
3269
dst: FReg,
3270
cond: XReg,
3271
if_nonzero: FReg,
3272
if_zero: FReg,
3273
) -> ControlFlow<Done> {
3274
let result = if self.state[cond].get_u32() != 0 {
3275
self.state[if_nonzero].get_f32()
3276
} else {
3277
self.state[if_zero].get_f32()
3278
};
3279
self.state[dst].set_f32(result);
3280
ControlFlow::Continue(())
3281
}
3282
3283
fn fselect64(
3284
&mut self,
3285
dst: FReg,
3286
cond: XReg,
3287
if_nonzero: FReg,
3288
if_zero: FReg,
3289
) -> ControlFlow<Done> {
3290
let result = if self.state[cond].get_u32() != 0 {
3291
self.state[if_nonzero].get_f64()
3292
} else {
3293
self.state[if_zero].get_f64()
3294
};
3295
self.state[dst].set_f64(result);
3296
ControlFlow::Continue(())
3297
}
3298
3299
fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3300
let a = self.state[src].get_i32();
3301
self.state[dst].set_f32(a as f32);
3302
ControlFlow::Continue(())
3303
}
3304
3305
fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3306
let a = self.state[src].get_u32();
3307
self.state[dst].set_f32(a as f32);
3308
ControlFlow::Continue(())
3309
}
3310
3311
fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3312
let a = self.state[src].get_i64();
3313
self.state[dst].set_f32(a as f32);
3314
ControlFlow::Continue(())
3315
}
3316
3317
fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3318
let a = self.state[src].get_u64();
3319
self.state[dst].set_f32(a as f32);
3320
ControlFlow::Continue(())
3321
}
3322
3323
fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3324
let a = self.state[src].get_i32();
3325
self.state[dst].set_f64(a as f64);
3326
ControlFlow::Continue(())
3327
}
3328
3329
fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3330
let a = self.state[src].get_u32();
3331
self.state[dst].set_f64(a as f64);
3332
ControlFlow::Continue(())
3333
}
3334
3335
fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3336
let a = self.state[src].get_i64();
3337
self.state[dst].set_f64(a as f64);
3338
ControlFlow::Continue(())
3339
}
3340
3341
fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3342
let a = self.state[src].get_u64();
3343
self.state[dst].set_f64(a as f64);
3344
ControlFlow::Continue(())
3345
}
3346
3347
fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3348
let a = self.state[src].get_f32();
3349
self.check_xnn_from_f32::<crate::X32FromF32S>(a, f32_cvt_to_int_bounds(true, 32))?;
3350
self.state[dst].set_i32(a as i32);
3351
ControlFlow::Continue(())
3352
}
3353
3354
fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3355
let a = self.state[src].get_f32();
3356
self.check_xnn_from_f32::<crate::X32FromF32U>(a, f32_cvt_to_int_bounds(false, 32))?;
3357
self.state[dst].set_u32(a as u32);
3358
ControlFlow::Continue(())
3359
}
3360
3361
fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3362
let a = self.state[src].get_f32();
3363
self.check_xnn_from_f32::<crate::X64FromF32S>(a, f32_cvt_to_int_bounds(true, 64))?;
3364
self.state[dst].set_i64(a as i64);
3365
ControlFlow::Continue(())
3366
}
3367
3368
fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3369
let a = self.state[src].get_f32();
3370
self.check_xnn_from_f32::<crate::X64FromF32U>(a, f32_cvt_to_int_bounds(false, 64))?;
3371
self.state[dst].set_u64(a as u64);
3372
ControlFlow::Continue(())
3373
}
3374
3375
fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3376
let a = self.state[src].get_f64();
3377
self.check_xnn_from_f64::<crate::X32FromF64S>(a, f64_cvt_to_int_bounds(true, 32))?;
3378
self.state[dst].set_i32(a as i32);
3379
ControlFlow::Continue(())
3380
}
3381
3382
fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3383
let a = self.state[src].get_f64();
3384
self.check_xnn_from_f64::<crate::X32FromF64U>(a, f64_cvt_to_int_bounds(false, 32))?;
3385
self.state[dst].set_u32(a as u32);
3386
ControlFlow::Continue(())
3387
}
3388
3389
fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3390
let a = self.state[src].get_f64();
3391
self.check_xnn_from_f64::<crate::X64FromF64S>(a, f64_cvt_to_int_bounds(true, 64))?;
3392
self.state[dst].set_i64(a as i64);
3393
ControlFlow::Continue(())
3394
}
3395
3396
fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3397
let a = self.state[src].get_f64();
3398
self.check_xnn_from_f64::<crate::X64FromF64U>(a, f64_cvt_to_int_bounds(false, 64))?;
3399
self.state[dst].set_u64(a as u64);
3400
ControlFlow::Continue(())
3401
}
3402
3403
fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3404
let a = self.state[src].get_f32();
3405
self.state[dst].set_i32(a as i32);
3406
ControlFlow::Continue(())
3407
}
3408
3409
fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3410
let a = self.state[src].get_f32();
3411
self.state[dst].set_u32(a as u32);
3412
ControlFlow::Continue(())
3413
}
3414
3415
fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3416
let a = self.state[src].get_f32();
3417
self.state[dst].set_i64(a as i64);
3418
ControlFlow::Continue(())
3419
}
3420
3421
fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3422
let a = self.state[src].get_f32();
3423
self.state[dst].set_u64(a as u64);
3424
ControlFlow::Continue(())
3425
}
3426
3427
fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3428
let a = self.state[src].get_f64();
3429
self.state[dst].set_i32(a as i32);
3430
ControlFlow::Continue(())
3431
}
3432
3433
fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3434
let a = self.state[src].get_f64();
3435
self.state[dst].set_u32(a as u32);
3436
ControlFlow::Continue(())
3437
}
3438
3439
fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3440
let a = self.state[src].get_f64();
3441
self.state[dst].set_i64(a as i64);
3442
ControlFlow::Continue(())
3443
}
3444
3445
fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3446
let a = self.state[src].get_f64();
3447
self.state[dst].set_u64(a as u64);
3448
ControlFlow::Continue(())
3449
}
3450
3451
fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3452
let a = self.state[src].get_f64();
3453
self.state[dst].set_f32(a as f32);
3454
ControlFlow::Continue(())
3455
}
3456
3457
fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3458
let a = self.state[src].get_f32();
3459
self.state[dst].set_f64(a.into());
3460
ControlFlow::Continue(())
3461
}
3462
3463
fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3464
let a = self.state[operands.src1].get_f32();
3465
let b = self.state[operands.src2].get_f32();
3466
self.state[operands.dst].set_f32(a.wasm_copysign(b));
3467
ControlFlow::Continue(())
3468
}
3469
3470
fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3471
let a = self.state[operands.src1].get_f64();
3472
let b = self.state[operands.src2].get_f64();
3473
self.state[operands.dst].set_f64(a.wasm_copysign(b));
3474
ControlFlow::Continue(())
3475
}
3476
3477
fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3478
let a = self.state[operands.src1].get_f32();
3479
let b = self.state[operands.src2].get_f32();
3480
self.state[operands.dst].set_f32(a + b);
3481
ControlFlow::Continue(())
3482
}
3483
3484
fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3485
let a = self.state[operands.src1].get_f32();
3486
let b = self.state[operands.src2].get_f32();
3487
self.state[operands.dst].set_f32(a - b);
3488
ControlFlow::Continue(())
3489
}
3490
3491
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3492
fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3493
let mut a = self.state[operands.src1].get_f32x4();
3494
let b = self.state[operands.src2].get_f32x4();
3495
for (a, b) in a.iter_mut().zip(b) {
3496
*a = *a - b;
3497
}
3498
self.state[operands.dst].set_f32x4(a);
3499
ControlFlow::Continue(())
3500
}
3501
3502
fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3503
let a = self.state[operands.src1].get_f32();
3504
let b = self.state[operands.src2].get_f32();
3505
self.state[operands.dst].set_f32(a * b);
3506
ControlFlow::Continue(())
3507
}
3508
3509
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3510
fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3511
let mut a = self.state[operands.src1].get_f32x4();
3512
let b = self.state[operands.src2].get_f32x4();
3513
for (a, b) in a.iter_mut().zip(b) {
3514
*a = *a * b;
3515
}
3516
self.state[operands.dst].set_f32x4(a);
3517
ControlFlow::Continue(())
3518
}
3519
3520
fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3521
let a = self.state[operands.src1].get_f32();
3522
let b = self.state[operands.src2].get_f32();
3523
self.state[operands.dst].set_f32(a / b);
3524
ControlFlow::Continue(())
3525
}
3526
3527
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3528
fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3529
let a = self.state[operands.src1].get_f32x4();
3530
let b = self.state[operands.src2].get_f32x4();
3531
let mut result = [0.0f32; 4];
3532
3533
for i in 0..4 {
3534
result[i] = a[i] / b[i];
3535
}
3536
3537
self.state[operands.dst].set_f32x4(result);
3538
ControlFlow::Continue(())
3539
}
3540
3541
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3542
fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3543
let a = self.state[operands.src1].get_f64x2();
3544
let b = self.state[operands.src2].get_f64x2();
3545
let mut result = [0.0f64; 2];
3546
3547
for i in 0..2 {
3548
result[i] = a[i] / b[i];
3549
}
3550
3551
self.state[operands.dst].set_f64x2(result);
3552
ControlFlow::Continue(())
3553
}
3554
3555
fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3556
let a = self.state[operands.src1].get_f32();
3557
let b = self.state[operands.src2].get_f32();
3558
self.state[operands.dst].set_f32(a.wasm_maximum(b));
3559
ControlFlow::Continue(())
3560
}
3561
3562
fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3563
let a = self.state[operands.src1].get_f32();
3564
let b = self.state[operands.src2].get_f32();
3565
self.state[operands.dst].set_f32(a.wasm_minimum(b));
3566
ControlFlow::Continue(())
3567
}
3568
3569
fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3570
let a = self.state[src].get_f32();
3571
self.state[dst].set_f32(a.wasm_trunc());
3572
ControlFlow::Continue(())
3573
}
3574
3575
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3576
fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3577
let mut a = self.state[src].get_f32x4();
3578
for elem in a.iter_mut() {
3579
*elem = elem.wasm_trunc();
3580
}
3581
self.state[dst].set_f32x4(a);
3582
ControlFlow::Continue(())
3583
}
3584
3585
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3586
fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3587
let mut a = self.state[src].get_f64x2();
3588
for elem in a.iter_mut() {
3589
*elem = elem.wasm_trunc();
3590
}
3591
self.state[dst].set_f64x2(a);
3592
ControlFlow::Continue(())
3593
}
3594
3595
fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3596
let a = self.state[src].get_f32();
3597
self.state[dst].set_f32(a.wasm_floor());
3598
ControlFlow::Continue(())
3599
}
3600
3601
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3602
fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3603
let mut a = self.state[src].get_f32x4();
3604
for elem in a.iter_mut() {
3605
*elem = elem.wasm_floor();
3606
}
3607
self.state[dst].set_f32x4(a);
3608
ControlFlow::Continue(())
3609
}
3610
3611
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3612
fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3613
let mut a = self.state[src].get_f64x2();
3614
for elem in a.iter_mut() {
3615
*elem = elem.wasm_floor();
3616
}
3617
self.state[dst].set_f64x2(a);
3618
ControlFlow::Continue(())
3619
}
3620
3621
fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3622
let a = self.state[src].get_f32();
3623
self.state[dst].set_f32(a.wasm_ceil());
3624
ControlFlow::Continue(())
3625
}
3626
3627
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3628
fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3629
let mut a = self.state[src].get_f32x4();
3630
for elem in a.iter_mut() {
3631
*elem = elem.wasm_ceil();
3632
}
3633
self.state[dst].set_f32x4(a);
3634
3635
ControlFlow::Continue(())
3636
}
3637
3638
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3639
fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3640
let mut a = self.state[src].get_f64x2();
3641
for elem in a.iter_mut() {
3642
*elem = elem.wasm_ceil();
3643
}
3644
self.state[dst].set_f64x2(a);
3645
3646
ControlFlow::Continue(())
3647
}
3648
3649
fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3650
let a = self.state[src].get_f32();
3651
self.state[dst].set_f32(a.wasm_nearest());
3652
ControlFlow::Continue(())
3653
}
3654
3655
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3656
fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3657
let mut a = self.state[src].get_f32x4();
3658
for elem in a.iter_mut() {
3659
*elem = elem.wasm_nearest();
3660
}
3661
self.state[dst].set_f32x4(a);
3662
ControlFlow::Continue(())
3663
}
3664
3665
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3666
fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3667
let mut a = self.state[src].get_f64x2();
3668
for elem in a.iter_mut() {
3669
*elem = elem.wasm_nearest();
3670
}
3671
self.state[dst].set_f64x2(a);
3672
ControlFlow::Continue(())
3673
}
3674
3675
fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3676
let a = self.state[src].get_f32();
3677
self.state[dst].set_f32(a.wasm_sqrt());
3678
ControlFlow::Continue(())
3679
}
3680
3681
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3682
fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3683
let mut a = self.state[src].get_f32x4();
3684
for elem in a.iter_mut() {
3685
*elem = elem.wasm_sqrt();
3686
}
3687
self.state[dst].set_f32x4(a);
3688
ControlFlow::Continue(())
3689
}
3690
3691
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3692
fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3693
let mut a = self.state[src].get_f64x2();
3694
for elem in a.iter_mut() {
3695
*elem = elem.wasm_sqrt();
3696
}
3697
self.state[dst].set_f64x2(a);
3698
ControlFlow::Continue(())
3699
}
3700
3701
fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3702
let a = self.state[src].get_f32();
3703
self.state[dst].set_f32(-a);
3704
ControlFlow::Continue(())
3705
}
3706
3707
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3708
fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3709
let mut a = self.state[src].get_f32x4();
3710
for elem in a.iter_mut() {
3711
*elem = -*elem;
3712
}
3713
self.state[dst].set_f32x4(a);
3714
ControlFlow::Continue(())
3715
}
3716
3717
fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3718
let a = self.state[src].get_f32();
3719
self.state[dst].set_f32(a.wasm_abs());
3720
ControlFlow::Continue(())
3721
}
3722
3723
fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3724
let a = self.state[operands.src1].get_f64();
3725
let b = self.state[operands.src2].get_f64();
3726
self.state[operands.dst].set_f64(a + b);
3727
ControlFlow::Continue(())
3728
}
3729
3730
fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3731
let a = self.state[operands.src1].get_f64();
3732
let b = self.state[operands.src2].get_f64();
3733
self.state[operands.dst].set_f64(a - b);
3734
ControlFlow::Continue(())
3735
}
3736
3737
fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3738
let a = self.state[operands.src1].get_f64();
3739
let b = self.state[operands.src2].get_f64();
3740
self.state[operands.dst].set_f64(a * b);
3741
ControlFlow::Continue(())
3742
}
3743
3744
fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3745
let a = self.state[operands.src1].get_f64();
3746
let b = self.state[operands.src2].get_f64();
3747
self.state[operands.dst].set_f64(a / b);
3748
ControlFlow::Continue(())
3749
}
3750
3751
fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3752
let a = self.state[operands.src1].get_f64();
3753
let b = self.state[operands.src2].get_f64();
3754
self.state[operands.dst].set_f64(a.wasm_maximum(b));
3755
ControlFlow::Continue(())
3756
}
3757
3758
fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3759
let a = self.state[operands.src1].get_f64();
3760
let b = self.state[operands.src2].get_f64();
3761
self.state[operands.dst].set_f64(a.wasm_minimum(b));
3762
ControlFlow::Continue(())
3763
}
3764
3765
fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3766
let a = self.state[src].get_f64();
3767
self.state[dst].set_f64(a.wasm_trunc());
3768
ControlFlow::Continue(())
3769
}
3770
3771
fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3772
let a = self.state[src].get_f64();
3773
self.state[dst].set_f64(a.wasm_floor());
3774
ControlFlow::Continue(())
3775
}
3776
3777
fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3778
let a = self.state[src].get_f64();
3779
self.state[dst].set_f64(a.wasm_ceil());
3780
ControlFlow::Continue(())
3781
}
3782
3783
fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3784
let a = self.state[src].get_f64();
3785
self.state[dst].set_f64(a.wasm_nearest());
3786
ControlFlow::Continue(())
3787
}
3788
3789
fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3790
let a = self.state[src].get_f64();
3791
self.state[dst].set_f64(a.wasm_sqrt());
3792
ControlFlow::Continue(())
3793
}
3794
3795
fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3796
let a = self.state[src].get_f64();
3797
self.state[dst].set_f64(-a);
3798
ControlFlow::Continue(())
3799
}
3800
3801
fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3802
let a = self.state[src].get_f64();
3803
self.state[dst].set_f64(a.wasm_abs());
3804
ControlFlow::Continue(())
3805
}
3806
3807
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3808
fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3809
let mut a = self.state[operands.src1].get_i8x16();
3810
let b = self.state[operands.src2].get_i8x16();
3811
for (a, b) in a.iter_mut().zip(b) {
3812
*a = a.wrapping_add(b);
3813
}
3814
self.state[operands.dst].set_i8x16(a);
3815
ControlFlow::Continue(())
3816
}
3817
3818
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3819
fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3820
let mut a = self.state[operands.src1].get_i16x8();
3821
let b = self.state[operands.src2].get_i16x8();
3822
for (a, b) in a.iter_mut().zip(b) {
3823
*a = a.wrapping_add(b);
3824
}
3825
self.state[operands.dst].set_i16x8(a);
3826
ControlFlow::Continue(())
3827
}
3828
3829
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3830
fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3831
let mut a = self.state[operands.src1].get_i32x4();
3832
let b = self.state[operands.src2].get_i32x4();
3833
for (a, b) in a.iter_mut().zip(b) {
3834
*a = a.wrapping_add(b);
3835
}
3836
self.state[operands.dst].set_i32x4(a);
3837
ControlFlow::Continue(())
3838
}
3839
3840
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3841
fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3842
let mut a = self.state[operands.src1].get_i64x2();
3843
let b = self.state[operands.src2].get_i64x2();
3844
for (a, b) in a.iter_mut().zip(b) {
3845
*a = a.wrapping_add(b);
3846
}
3847
self.state[operands.dst].set_i64x2(a);
3848
ControlFlow::Continue(())
3849
}
3850
3851
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3852
fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3853
let mut a = self.state[operands.src1].get_f32x4();
3854
let b = self.state[operands.src2].get_f32x4();
3855
for (a, b) in a.iter_mut().zip(b) {
3856
*a += b;
3857
}
3858
self.state[operands.dst].set_f32x4(a);
3859
ControlFlow::Continue(())
3860
}
3861
3862
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3863
fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3864
let mut a = self.state[operands.src1].get_f64x2();
3865
let b = self.state[operands.src2].get_f64x2();
3866
for (a, b) in a.iter_mut().zip(b) {
3867
*a += b;
3868
}
3869
self.state[operands.dst].set_f64x2(a);
3870
ControlFlow::Continue(())
3871
}
3872
3873
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3874
fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3875
let mut a = self.state[operands.src1].get_i8x16();
3876
let b = self.state[operands.src2].get_i8x16();
3877
for (a, b) in a.iter_mut().zip(b) {
3878
*a = (*a).saturating_add(b);
3879
}
3880
self.state[operands.dst].set_i8x16(a);
3881
ControlFlow::Continue(())
3882
}
3883
3884
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3885
fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3886
let mut a = self.state[operands.src1].get_u8x16();
3887
let b = self.state[operands.src2].get_u8x16();
3888
for (a, b) in a.iter_mut().zip(b) {
3889
*a = (*a).saturating_add(b);
3890
}
3891
self.state[operands.dst].set_u8x16(a);
3892
ControlFlow::Continue(())
3893
}
3894
3895
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3896
fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3897
let mut a = self.state[operands.src1].get_i16x8();
3898
let b = self.state[operands.src2].get_i16x8();
3899
for (a, b) in a.iter_mut().zip(b) {
3900
*a = (*a).saturating_add(b);
3901
}
3902
self.state[operands.dst].set_i16x8(a);
3903
ControlFlow::Continue(())
3904
}
3905
3906
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3907
fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3908
let mut a = self.state[operands.src1].get_u16x8();
3909
let b = self.state[operands.src2].get_u16x8();
3910
for (a, b) in a.iter_mut().zip(b) {
3911
*a = (*a).saturating_add(b);
3912
}
3913
self.state[operands.dst].set_u16x8(a);
3914
ControlFlow::Continue(())
3915
}
3916
3917
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3918
fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3919
let a = self.state[operands.src1].get_i16x8();
3920
let b = self.state[operands.src2].get_i16x8();
3921
let mut result = [0i16; 8];
3922
let half = result.len() / 2;
3923
for i in 0..half {
3924
result[i] = a[2 * i].wrapping_add(a[2 * i + 1]);
3925
result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]);
3926
}
3927
self.state[operands.dst].set_i16x8(result);
3928
ControlFlow::Continue(())
3929
}
3930
3931
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3932
fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3933
let a = self.state[operands.src1].get_i32x4();
3934
let b = self.state[operands.src2].get_i32x4();
3935
let mut result = [0i32; 4];
3936
result[0] = a[0].wrapping_add(a[1]);
3937
result[1] = a[2].wrapping_add(a[3]);
3938
result[2] = b[0].wrapping_add(b[1]);
3939
result[3] = b[2].wrapping_add(b[3]);
3940
self.state[operands.dst].set_i32x4(result);
3941
ControlFlow::Continue(())
3942
}
3943
3944
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3945
fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3946
let a = self.state[operands.src1].get_i8x16();
3947
let b = self.state[operands.src2].get_u32();
3948
self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b)));
3949
ControlFlow::Continue(())
3950
}
3951
3952
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3953
fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3954
let a = self.state[operands.src1].get_i16x8();
3955
let b = self.state[operands.src2].get_u32();
3956
self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b)));
3957
ControlFlow::Continue(())
3958
}
3959
3960
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3961
fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3962
let a = self.state[operands.src1].get_i32x4();
3963
let b = self.state[operands.src2].get_u32();
3964
self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b)));
3965
ControlFlow::Continue(())
3966
}
3967
3968
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3969
fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3970
let a = self.state[operands.src1].get_i64x2();
3971
let b = self.state[operands.src2].get_u32();
3972
self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b)));
3973
ControlFlow::Continue(())
3974
}
3975
3976
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3977
fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3978
let a = self.state[operands.src1].get_i8x16();
3979
let b = self.state[operands.src2].get_u32();
3980
self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b)));
3981
ControlFlow::Continue(())
3982
}
3983
3984
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3985
fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3986
let a = self.state[operands.src1].get_i16x8();
3987
let b = self.state[operands.src2].get_u32();
3988
self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b)));
3989
ControlFlow::Continue(())
3990
}
3991
3992
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
3993
fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3994
let a = self.state[operands.src1].get_i32x4();
3995
let b = self.state[operands.src2].get_u32();
3996
self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b)));
3997
ControlFlow::Continue(())
3998
}
3999
4000
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4001
fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4002
let a = self.state[operands.src1].get_i64x2();
4003
let b = self.state[operands.src2].get_u32();
4004
self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b)));
4005
ControlFlow::Continue(())
4006
}
4007
4008
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4009
fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4010
let a = self.state[operands.src1].get_u8x16();
4011
let b = self.state[operands.src2].get_u32();
4012
self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b)));
4013
ControlFlow::Continue(())
4014
}
4015
4016
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4017
fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4018
let a = self.state[operands.src1].get_u16x8();
4019
let b = self.state[operands.src2].get_u32();
4020
self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b)));
4021
ControlFlow::Continue(())
4022
}
4023
4024
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4025
fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4026
let a = self.state[operands.src1].get_u32x4();
4027
let b = self.state[operands.src2].get_u32();
4028
self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b)));
4029
ControlFlow::Continue(())
4030
}
4031
4032
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4033
fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4034
let a = self.state[operands.src1].get_u64x2();
4035
let b = self.state[operands.src2].get_u32();
4036
self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b)));
4037
ControlFlow::Continue(())
4038
}
4039
4040
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4041
fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> {
4042
self.state[dst].set_u128(val);
4043
ControlFlow::Continue(())
4044
}
4045
4046
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4047
fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4048
let val = self.state[src].get_u32() as u8;
4049
self.state[dst].set_u8x16([val; 16]);
4050
ControlFlow::Continue(())
4051
}
4052
4053
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4054
fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4055
let val = self.state[src].get_u32() as u16;
4056
self.state[dst].set_u16x8([val; 8]);
4057
ControlFlow::Continue(())
4058
}
4059
4060
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4061
fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4062
let val = self.state[src].get_u32();
4063
self.state[dst].set_u32x4([val; 4]);
4064
ControlFlow::Continue(())
4065
}
4066
4067
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4068
fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4069
let val = self.state[src].get_u64();
4070
self.state[dst].set_u64x2([val; 2]);
4071
ControlFlow::Continue(())
4072
}
4073
4074
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4075
fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4076
let val = self.state[src].get_f32();
4077
self.state[dst].set_f32x4([val; 4]);
4078
ControlFlow::Continue(())
4079
}
4080
4081
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4082
fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4083
let val = self.state[src].get_f64();
4084
self.state[dst].set_f64x2([val; 2]);
4085
ControlFlow::Continue(())
4086
}
4087
4088
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4089
fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4090
let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? };
4091
self.state[dst].set_i16x8(val.map(|i| i.into()));
4092
ControlFlow::Continue(())
4093
}
4094
4095
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4096
fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4097
let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? };
4098
self.state[dst].set_u16x8(val.map(|i| i.into()));
4099
ControlFlow::Continue(())
4100
}
4101
4102
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4103
fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4104
let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? };
4105
self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
4106
ControlFlow::Continue(())
4107
}
4108
4109
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4110
fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4111
let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? };
4112
self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
4113
ControlFlow::Continue(())
4114
}
4115
4116
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4117
fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4118
let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? };
4119
self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
4120
ControlFlow::Continue(())
4121
}
4122
4123
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4124
fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4125
let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? };
4126
self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
4127
ControlFlow::Continue(())
4128
}
4129
4130
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4131
fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4132
let a = self.state[operands.src1].get_u128();
4133
let b = self.state[operands.src2].get_u128();
4134
self.state[operands.dst].set_u128(a & b);
4135
ControlFlow::Continue(())
4136
}
4137
4138
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4139
fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4140
let a = self.state[operands.src1].get_u128();
4141
let b = self.state[operands.src2].get_u128();
4142
self.state[operands.dst].set_u128(a | b);
4143
ControlFlow::Continue(())
4144
}
4145
4146
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4147
fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4148
let a = self.state[operands.src1].get_u128();
4149
let b = self.state[operands.src2].get_u128();
4150
self.state[operands.dst].set_u128(a ^ b);
4151
ControlFlow::Continue(())
4152
}
4153
4154
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4155
fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4156
let a = self.state[src].get_u128();
4157
self.state[dst].set_u128(!a);
4158
ControlFlow::Continue(())
4159
}
4160
4161
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4162
fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> {
4163
let c = self.state[c].get_u128();
4164
let x = self.state[x].get_u128();
4165
let y = self.state[y].get_u128();
4166
self.state[dst].set_u128((c & x) | (!c & y));
4167
ControlFlow::Continue(())
4168
}
4169
4170
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4171
fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4172
let a = self.state[src].get_u8x16();
4173
let mut result = 0;
4174
for item in a.iter().rev() {
4175
result <<= 1;
4176
result |= (*item >> 7) as u32;
4177
}
4178
self.state[dst].set_u32(result);
4179
ControlFlow::Continue(())
4180
}
4181
4182
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4183
fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4184
let a = self.state[src].get_u16x8();
4185
let mut result = 0;
4186
for item in a.iter().rev() {
4187
result <<= 1;
4188
result |= (*item >> 15) as u32;
4189
}
4190
self.state[dst].set_u32(result);
4191
ControlFlow::Continue(())
4192
}
4193
4194
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4195
fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4196
let a = self.state[src].get_u32x4();
4197
let mut result = 0;
4198
for item in a.iter().rev() {
4199
result <<= 1;
4200
result |= *item >> 31;
4201
}
4202
self.state[dst].set_u32(result);
4203
ControlFlow::Continue(())
4204
}
4205
4206
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4207
fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4208
let a = self.state[src].get_u64x2();
4209
let mut result = 0;
4210
for item in a.iter().rev() {
4211
result <<= 1;
4212
result |= (*item >> 63) as u32;
4213
}
4214
self.state[dst].set_u32(result);
4215
ControlFlow::Continue(())
4216
}
4217
4218
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4219
fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4220
let a = self.state[src].get_u8x16();
4221
let result = a.iter().all(|a| *a != 0);
4222
self.state[dst].set_u32(u32::from(result));
4223
ControlFlow::Continue(())
4224
}
4225
4226
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4227
fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4228
let a = self.state[src].get_u16x8();
4229
let result = a.iter().all(|a| *a != 0);
4230
self.state[dst].set_u32(u32::from(result));
4231
ControlFlow::Continue(())
4232
}
4233
4234
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4235
fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4236
let a = self.state[src].get_u32x4();
4237
let result = a.iter().all(|a| *a != 0);
4238
self.state[dst].set_u32(u32::from(result));
4239
ControlFlow::Continue(())
4240
}
4241
4242
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4243
fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4244
let a = self.state[src].get_u64x2();
4245
let result = a.iter().all(|a| *a != 0);
4246
self.state[dst].set_u32(u32::from(result));
4247
ControlFlow::Continue(())
4248
}
4249
4250
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4251
fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4252
let a = self.state[src].get_u8x16();
4253
let result = a.iter().any(|a| *a != 0);
4254
self.state[dst].set_u32(u32::from(result));
4255
ControlFlow::Continue(())
4256
}
4257
4258
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4259
fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4260
let a = self.state[src].get_u16x8();
4261
let result = a.iter().any(|a| *a != 0);
4262
self.state[dst].set_u32(u32::from(result));
4263
ControlFlow::Continue(())
4264
}
4265
4266
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4267
fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4268
let a = self.state[src].get_u32x4();
4269
let result = a.iter().any(|a| *a != 0);
4270
self.state[dst].set_u32(u32::from(result));
4271
ControlFlow::Continue(())
4272
}
4273
4274
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4275
fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4276
let a = self.state[src].get_u64x2();
4277
let result = a.iter().any(|a| *a != 0);
4278
self.state[dst].set_u32(u32::from(result));
4279
ControlFlow::Continue(())
4280
}
4281
4282
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4283
fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4284
let a = self.state[src].get_i32x4();
4285
self.state[dst].set_f32x4(a.map(|i| i as f32));
4286
ControlFlow::Continue(())
4287
}
4288
4289
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4290
fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4291
let a = self.state[src].get_u32x4();
4292
self.state[dst].set_f32x4(a.map(|i| i as f32));
4293
ControlFlow::Continue(())
4294
}
4295
4296
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4297
fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4298
let a = self.state[src].get_i64x2();
4299
self.state[dst].set_f64x2(a.map(|i| i as f64));
4300
ControlFlow::Continue(())
4301
}
4302
4303
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4304
fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4305
let a = self.state[src].get_u64x2();
4306
self.state[dst].set_f64x2(a.map(|i| i as f64));
4307
ControlFlow::Continue(())
4308
}
4309
4310
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4311
fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4312
let a = self.state[src].get_f32x4();
4313
self.state[dst].set_i32x4(a.map(|f| f as i32));
4314
ControlFlow::Continue(())
4315
}
4316
4317
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4318
fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4319
let a = self.state[src].get_f32x4();
4320
self.state[dst].set_u32x4(a.map(|f| f as u32));
4321
ControlFlow::Continue(())
4322
}
4323
4324
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4325
fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4326
let a = self.state[src].get_f64x2();
4327
self.state[dst].set_i64x2(a.map(|f| f as i64));
4328
ControlFlow::Continue(())
4329
}
4330
4331
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4332
fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4333
let a = self.state[src].get_f64x2();
4334
self.state[dst].set_u64x2(a.map(|f| f as u64));
4335
ControlFlow::Continue(())
4336
}
4337
4338
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4339
fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4340
let a = *self.state[src].get_i8x16().first_chunk().unwrap();
4341
self.state[dst].set_i16x8(a.map(|i| i.into()));
4342
ControlFlow::Continue(())
4343
}
4344
4345
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4346
fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4347
let a = *self.state[src].get_u8x16().first_chunk().unwrap();
4348
self.state[dst].set_u16x8(a.map(|i| i.into()));
4349
ControlFlow::Continue(())
4350
}
4351
4352
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4353
fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4354
let a = *self.state[src].get_i16x8().first_chunk().unwrap();
4355
self.state[dst].set_i32x4(a.map(|i| i.into()));
4356
ControlFlow::Continue(())
4357
}
4358
4359
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4360
fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4361
let a = *self.state[src].get_u16x8().first_chunk().unwrap();
4362
self.state[dst].set_u32x4(a.map(|i| i.into()));
4363
ControlFlow::Continue(())
4364
}
4365
4366
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4367
fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4368
let a = *self.state[src].get_i32x4().first_chunk().unwrap();
4369
self.state[dst].set_i64x2(a.map(|i| i.into()));
4370
ControlFlow::Continue(())
4371
}
4372
4373
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4374
fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4375
let a = *self.state[src].get_u32x4().first_chunk().unwrap();
4376
self.state[dst].set_u64x2(a.map(|i| i.into()));
4377
ControlFlow::Continue(())
4378
}
4379
4380
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4381
fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4382
let a = *self.state[src].get_i8x16().last_chunk().unwrap();
4383
self.state[dst].set_i16x8(a.map(|i| i.into()));
4384
ControlFlow::Continue(())
4385
}
4386
4387
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4388
fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4389
let a = *self.state[src].get_u8x16().last_chunk().unwrap();
4390
self.state[dst].set_u16x8(a.map(|i| i.into()));
4391
ControlFlow::Continue(())
4392
}
4393
4394
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4395
fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4396
let a = *self.state[src].get_i16x8().last_chunk().unwrap();
4397
self.state[dst].set_i32x4(a.map(|i| i.into()));
4398
ControlFlow::Continue(())
4399
}
4400
4401
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4402
fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4403
let a = *self.state[src].get_u16x8().last_chunk().unwrap();
4404
self.state[dst].set_u32x4(a.map(|i| i.into()));
4405
ControlFlow::Continue(())
4406
}
4407
4408
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4409
fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4410
let a = *self.state[src].get_i32x4().last_chunk().unwrap();
4411
self.state[dst].set_i64x2(a.map(|i| i.into()));
4412
ControlFlow::Continue(())
4413
}
4414
4415
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4416
fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4417
let a = *self.state[src].get_u32x4().last_chunk().unwrap();
4418
self.state[dst].set_u64x2(a.map(|i| i.into()));
4419
ControlFlow::Continue(())
4420
}
4421
4422
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4423
fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4424
let a = self.state[operands.src1].get_i16x8();
4425
let b = self.state[operands.src2].get_i16x8();
4426
let mut result = [0; 16];
4427
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4428
*d = (*i)
4429
.try_into()
4430
.unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX });
4431
}
4432
self.state[operands.dst].set_i8x16(result);
4433
ControlFlow::Continue(())
4434
}
4435
4436
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4437
fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4438
let a = self.state[operands.src1].get_i16x8();
4439
let b = self.state[operands.src2].get_i16x8();
4440
let mut result = [0; 16];
4441
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4442
*d = (*i)
4443
.try_into()
4444
.unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX });
4445
}
4446
self.state[operands.dst].set_u8x16(result);
4447
ControlFlow::Continue(())
4448
}
4449
4450
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4451
fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4452
let a = self.state[operands.src1].get_i32x4();
4453
let b = self.state[operands.src2].get_i32x4();
4454
let mut result = [0; 8];
4455
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4456
*d = (*i)
4457
.try_into()
4458
.unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX });
4459
}
4460
self.state[operands.dst].set_i16x8(result);
4461
ControlFlow::Continue(())
4462
}
4463
4464
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4465
fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4466
let a = self.state[operands.src1].get_i32x4();
4467
let b = self.state[operands.src2].get_i32x4();
4468
let mut result = [0; 8];
4469
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4470
*d = (*i)
4471
.try_into()
4472
.unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX });
4473
}
4474
self.state[operands.dst].set_u16x8(result);
4475
ControlFlow::Continue(())
4476
}
4477
4478
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4479
fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4480
let a = self.state[operands.src1].get_i64x2();
4481
let b = self.state[operands.src2].get_i64x2();
4482
let mut result = [0; 4];
4483
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4484
*d = (*i)
4485
.try_into()
4486
.unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX });
4487
}
4488
self.state[operands.dst].set_i32x4(result);
4489
ControlFlow::Continue(())
4490
}
4491
4492
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4493
fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4494
let a = self.state[operands.src1].get_i64x2();
4495
let b = self.state[operands.src2].get_i64x2();
4496
let mut result = [0; 4];
4497
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4498
*d = (*i)
4499
.try_into()
4500
.unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX });
4501
}
4502
self.state[operands.dst].set_u32x4(result);
4503
ControlFlow::Continue(())
4504
}
4505
4506
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4507
fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4508
let a = self.state[operands.src1].get_u64x2();
4509
let b = self.state[operands.src2].get_u64x2();
4510
let mut result = [0; 4];
4511
for (i, d) in a.iter().chain(&b).zip(&mut result) {
4512
*d = (*i).try_into().unwrap_or(u32::MAX);
4513
}
4514
self.state[operands.dst].set_u32x4(result);
4515
ControlFlow::Continue(())
4516
}
4517
4518
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4519
fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4520
let a = self.state[src].get_f32x4();
4521
self.state[dst].set_f64x2([a[0].into(), a[1].into()]);
4522
ControlFlow::Continue(())
4523
}
4524
4525
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4526
fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4527
let a = self.state[src].get_f64x2();
4528
self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]);
4529
ControlFlow::Continue(())
4530
}
4531
4532
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4533
fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4534
let mut a = self.state[operands.src1].get_i8x16();
4535
let b = self.state[operands.src2].get_i8x16();
4536
for (a, b) in a.iter_mut().zip(b) {
4537
*a = a.wrapping_sub(b);
4538
}
4539
self.state[operands.dst].set_i8x16(a);
4540
ControlFlow::Continue(())
4541
}
4542
4543
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4544
fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4545
let mut a = self.state[operands.src1].get_i16x8();
4546
let b = self.state[operands.src2].get_i16x8();
4547
for (a, b) in a.iter_mut().zip(b) {
4548
*a = a.wrapping_sub(b);
4549
}
4550
self.state[operands.dst].set_i16x8(a);
4551
ControlFlow::Continue(())
4552
}
4553
4554
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4555
fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4556
let mut a = self.state[operands.src1].get_i32x4();
4557
let b = self.state[operands.src2].get_i32x4();
4558
for (a, b) in a.iter_mut().zip(b) {
4559
*a = a.wrapping_sub(b);
4560
}
4561
self.state[operands.dst].set_i32x4(a);
4562
ControlFlow::Continue(())
4563
}
4564
4565
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4566
fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4567
let mut a = self.state[operands.src1].get_i64x2();
4568
let b = self.state[operands.src2].get_i64x2();
4569
for (a, b) in a.iter_mut().zip(b) {
4570
*a = a.wrapping_sub(b);
4571
}
4572
self.state[operands.dst].set_i64x2(a);
4573
ControlFlow::Continue(())
4574
}
4575
4576
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4577
fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4578
let mut a = self.state[operands.src1].get_i8x16();
4579
let b = self.state[operands.src2].get_i8x16();
4580
for (a, b) in a.iter_mut().zip(b) {
4581
*a = a.saturating_sub(b);
4582
}
4583
self.state[operands.dst].set_i8x16(a);
4584
ControlFlow::Continue(())
4585
}
4586
4587
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4588
fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4589
let mut a = self.state[operands.src1].get_u8x16();
4590
let b = self.state[operands.src2].get_u8x16();
4591
for (a, b) in a.iter_mut().zip(b) {
4592
*a = a.saturating_sub(b);
4593
}
4594
self.state[operands.dst].set_u8x16(a);
4595
ControlFlow::Continue(())
4596
}
4597
4598
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4599
fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4600
let mut a = self.state[operands.src1].get_i16x8();
4601
let b = self.state[operands.src2].get_i16x8();
4602
for (a, b) in a.iter_mut().zip(b) {
4603
*a = a.saturating_sub(b);
4604
}
4605
self.state[operands.dst].set_i16x8(a);
4606
ControlFlow::Continue(())
4607
}
4608
4609
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4610
fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4611
let mut a = self.state[operands.src1].get_u16x8();
4612
let b = self.state[operands.src2].get_u16x8();
4613
for (a, b) in a.iter_mut().zip(b) {
4614
*a = a.saturating_sub(b);
4615
}
4616
self.state[operands.dst].set_u16x8(a);
4617
ControlFlow::Continue(())
4618
}
4619
4620
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4621
fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4622
let mut a = self.state[operands.src1].get_f64x2();
4623
let b = self.state[operands.src2].get_f64x2();
4624
for (a, b) in a.iter_mut().zip(b) {
4625
*a = *a - b;
4626
}
4627
self.state[operands.dst].set_f64x2(a);
4628
ControlFlow::Continue(())
4629
}
4630
4631
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4632
fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4633
let mut a = self.state[operands.src1].get_i8x16();
4634
let b = self.state[operands.src2].get_i8x16();
4635
for (a, b) in a.iter_mut().zip(b) {
4636
*a = a.wrapping_mul(b);
4637
}
4638
self.state[operands.dst].set_i8x16(a);
4639
ControlFlow::Continue(())
4640
}
4641
4642
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4643
fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4644
let mut a = self.state[operands.src1].get_i16x8();
4645
let b = self.state[operands.src2].get_i16x8();
4646
for (a, b) in a.iter_mut().zip(b) {
4647
*a = a.wrapping_mul(b);
4648
}
4649
self.state[operands.dst].set_i16x8(a);
4650
ControlFlow::Continue(())
4651
}
4652
4653
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4654
fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4655
let mut a = self.state[operands.src1].get_i32x4();
4656
let b = self.state[operands.src2].get_i32x4();
4657
for (a, b) in a.iter_mut().zip(b) {
4658
*a = a.wrapping_mul(b);
4659
}
4660
self.state[operands.dst].set_i32x4(a);
4661
ControlFlow::Continue(())
4662
}
4663
4664
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4665
fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4666
let mut a = self.state[operands.src1].get_i64x2();
4667
let b = self.state[operands.src2].get_i64x2();
4668
for (a, b) in a.iter_mut().zip(b) {
4669
*a = a.wrapping_mul(b);
4670
}
4671
self.state[operands.dst].set_i64x2(a);
4672
ControlFlow::Continue(())
4673
}
4674
4675
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4676
fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4677
let mut a = self.state[operands.src1].get_f64x2();
4678
let b = self.state[operands.src2].get_f64x2();
4679
for (a, b) in a.iter_mut().zip(b) {
4680
*a = *a * b;
4681
}
4682
self.state[operands.dst].set_f64x2(a);
4683
ControlFlow::Continue(())
4684
}
4685
4686
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4687
fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4688
let mut a = self.state[operands.src1].get_i16x8();
4689
let b = self.state[operands.src2].get_i16x8();
4690
const MIN: i32 = i16::MIN as i32;
4691
const MAX: i32 = i16::MAX as i32;
4692
for (a, b) in a.iter_mut().zip(b) {
4693
let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
4694
*a = r.clamp(MIN, MAX) as i16;
4695
}
4696
self.state[operands.dst].set_i16x8(a);
4697
ControlFlow::Continue(())
4698
}
4699
4700
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4701
fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4702
let a = self.state[src].get_u8x16();
4703
self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8));
4704
ControlFlow::Continue(())
4705
}
4706
4707
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4708
fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4709
let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
4710
self.state[dst].set_u32(u32::from(a));
4711
ControlFlow::Continue(())
4712
}
4713
4714
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4715
fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4716
let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) };
4717
self.state[dst].set_u32(u32::from(a));
4718
ControlFlow::Continue(())
4719
}
4720
4721
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4722
fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4723
let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) };
4724
self.state[dst].set_u32(a);
4725
ControlFlow::Continue(())
4726
}
4727
4728
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4729
fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4730
let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) };
4731
self.state[dst].set_u64(a);
4732
ControlFlow::Continue(())
4733
}
4734
4735
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4736
fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4737
let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) };
4738
self.state[dst].set_f32(a);
4739
ControlFlow::Continue(())
4740
}
4741
4742
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4743
fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4744
let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) };
4745
self.state[dst].set_f64(a);
4746
ControlFlow::Continue(())
4747
}
4748
4749
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4750
fn vinsertx8(
4751
&mut self,
4752
operands: BinaryOperands<VReg, VReg, XReg>,
4753
lane: u8,
4754
) -> ControlFlow<Done> {
4755
let mut a = self.state[operands.src1].get_u8x16();
4756
let b = self.state[operands.src2].get_u32() as u8;
4757
unsafe {
4758
*a.get_unchecked_mut(usize::from(lane)) = b;
4759
}
4760
self.state[operands.dst].set_u8x16(a);
4761
ControlFlow::Continue(())
4762
}
4763
4764
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4765
fn vinsertx16(
4766
&mut self,
4767
operands: BinaryOperands<VReg, VReg, XReg>,
4768
lane: u8,
4769
) -> ControlFlow<Done> {
4770
let mut a = self.state[operands.src1].get_u16x8();
4771
let b = self.state[operands.src2].get_u32() as u16;
4772
unsafe {
4773
*a.get_unchecked_mut(usize::from(lane)) = b;
4774
}
4775
self.state[operands.dst].set_u16x8(a);
4776
ControlFlow::Continue(())
4777
}
4778
4779
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4780
fn vinsertx32(
4781
&mut self,
4782
operands: BinaryOperands<VReg, VReg, XReg>,
4783
lane: u8,
4784
) -> ControlFlow<Done> {
4785
let mut a = self.state[operands.src1].get_u32x4();
4786
let b = self.state[operands.src2].get_u32();
4787
unsafe {
4788
*a.get_unchecked_mut(usize::from(lane)) = b;
4789
}
4790
self.state[operands.dst].set_u32x4(a);
4791
ControlFlow::Continue(())
4792
}
4793
4794
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4795
fn vinsertx64(
4796
&mut self,
4797
operands: BinaryOperands<VReg, VReg, XReg>,
4798
lane: u8,
4799
) -> ControlFlow<Done> {
4800
let mut a = self.state[operands.src1].get_u64x2();
4801
let b = self.state[operands.src2].get_u64();
4802
unsafe {
4803
*a.get_unchecked_mut(usize::from(lane)) = b;
4804
}
4805
self.state[operands.dst].set_u64x2(a);
4806
ControlFlow::Continue(())
4807
}
4808
4809
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4810
fn vinsertf32(
4811
&mut self,
4812
operands: BinaryOperands<VReg, VReg, FReg>,
4813
lane: u8,
4814
) -> ControlFlow<Done> {
4815
let mut a = self.state[operands.src1].get_f32x4();
4816
let b = self.state[operands.src2].get_f32();
4817
unsafe {
4818
*a.get_unchecked_mut(usize::from(lane)) = b;
4819
}
4820
self.state[operands.dst].set_f32x4(a);
4821
ControlFlow::Continue(())
4822
}
4823
4824
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4825
fn vinsertf64(
4826
&mut self,
4827
operands: BinaryOperands<VReg, VReg, FReg>,
4828
lane: u8,
4829
) -> ControlFlow<Done> {
4830
let mut a = self.state[operands.src1].get_f64x2();
4831
let b = self.state[operands.src2].get_f64();
4832
unsafe {
4833
*a.get_unchecked_mut(usize::from(lane)) = b;
4834
}
4835
self.state[operands.dst].set_f64x2(a);
4836
ControlFlow::Continue(())
4837
}
4838
4839
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4840
fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4841
let a = self.state[operands.src1].get_u8x16();
4842
let b = self.state[operands.src2].get_u8x16();
4843
let mut c = [0; 16];
4844
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4845
*c = if a == b { u8::MAX } else { 0 };
4846
}
4847
self.state[operands.dst].set_u8x16(c);
4848
ControlFlow::Continue(())
4849
}
4850
4851
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4852
fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4853
let a = self.state[operands.src1].get_u8x16();
4854
let b = self.state[operands.src2].get_u8x16();
4855
let mut c = [0; 16];
4856
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4857
*c = if a != b { u8::MAX } else { 0 };
4858
}
4859
self.state[operands.dst].set_u8x16(c);
4860
ControlFlow::Continue(())
4861
}
4862
4863
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4864
fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4865
let a = self.state[operands.src1].get_i8x16();
4866
let b = self.state[operands.src2].get_i8x16();
4867
let mut c = [0; 16];
4868
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4869
*c = if a < b { u8::MAX } else { 0 };
4870
}
4871
self.state[operands.dst].set_u8x16(c);
4872
ControlFlow::Continue(())
4873
}
4874
4875
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4876
fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4877
let a = self.state[operands.src1].get_i8x16();
4878
let b = self.state[operands.src2].get_i8x16();
4879
let mut c = [0; 16];
4880
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4881
*c = if a <= b { u8::MAX } else { 0 };
4882
}
4883
self.state[operands.dst].set_u8x16(c);
4884
ControlFlow::Continue(())
4885
}
4886
4887
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4888
fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4889
let a = self.state[operands.src1].get_u8x16();
4890
let b = self.state[operands.src2].get_u8x16();
4891
let mut c = [0; 16];
4892
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4893
*c = if a < b { u8::MAX } else { 0 };
4894
}
4895
self.state[operands.dst].set_u8x16(c);
4896
ControlFlow::Continue(())
4897
}
4898
4899
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4900
fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4901
let a = self.state[operands.src1].get_u8x16();
4902
let b = self.state[operands.src2].get_u8x16();
4903
let mut c = [0; 16];
4904
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4905
*c = if a <= b { u8::MAX } else { 0 };
4906
}
4907
self.state[operands.dst].set_u8x16(c);
4908
ControlFlow::Continue(())
4909
}
4910
4911
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4912
fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4913
let a = self.state[operands.src1].get_u16x8();
4914
let b = self.state[operands.src2].get_u16x8();
4915
let mut c = [0; 8];
4916
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4917
*c = if a == b { u16::MAX } else { 0 };
4918
}
4919
self.state[operands.dst].set_u16x8(c);
4920
ControlFlow::Continue(())
4921
}
4922
4923
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4924
fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4925
let a = self.state[operands.src1].get_u16x8();
4926
let b = self.state[operands.src2].get_u16x8();
4927
let mut c = [0; 8];
4928
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4929
*c = if a != b { u16::MAX } else { 0 };
4930
}
4931
self.state[operands.dst].set_u16x8(c);
4932
ControlFlow::Continue(())
4933
}
4934
4935
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4936
fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4937
let a = self.state[operands.src1].get_i16x8();
4938
let b = self.state[operands.src2].get_i16x8();
4939
let mut c = [0; 8];
4940
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4941
*c = if a < b { u16::MAX } else { 0 };
4942
}
4943
self.state[operands.dst].set_u16x8(c);
4944
ControlFlow::Continue(())
4945
}
4946
4947
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4948
fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4949
let a = self.state[operands.src1].get_i16x8();
4950
let b = self.state[operands.src2].get_i16x8();
4951
let mut c = [0; 8];
4952
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4953
*c = if a <= b { u16::MAX } else { 0 };
4954
}
4955
self.state[operands.dst].set_u16x8(c);
4956
ControlFlow::Continue(())
4957
}
4958
4959
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4960
fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4961
let a = self.state[operands.src1].get_u16x8();
4962
let b = self.state[operands.src2].get_u16x8();
4963
let mut c = [0; 8];
4964
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4965
*c = if a < b { u16::MAX } else { 0 };
4966
}
4967
self.state[operands.dst].set_u16x8(c);
4968
ControlFlow::Continue(())
4969
}
4970
4971
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4972
fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4973
let a = self.state[operands.src1].get_u16x8();
4974
let b = self.state[operands.src2].get_u16x8();
4975
let mut c = [0; 8];
4976
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4977
*c = if a <= b { u16::MAX } else { 0 };
4978
}
4979
self.state[operands.dst].set_u16x8(c);
4980
ControlFlow::Continue(())
4981
}
4982
4983
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4984
fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4985
let a = self.state[operands.src1].get_u32x4();
4986
let b = self.state[operands.src2].get_u32x4();
4987
let mut c = [0; 4];
4988
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4989
*c = if a == b { u32::MAX } else { 0 };
4990
}
4991
self.state[operands.dst].set_u32x4(c);
4992
ControlFlow::Continue(())
4993
}
4994
4995
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
4996
fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4997
let a = self.state[operands.src1].get_u32x4();
4998
let b = self.state[operands.src2].get_u32x4();
4999
let mut c = [0; 4];
5000
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5001
*c = if a != b { u32::MAX } else { 0 };
5002
}
5003
self.state[operands.dst].set_u32x4(c);
5004
ControlFlow::Continue(())
5005
}
5006
5007
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5008
fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5009
let a = self.state[operands.src1].get_i32x4();
5010
let b = self.state[operands.src2].get_i32x4();
5011
let mut c = [0; 4];
5012
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5013
*c = if a < b { u32::MAX } else { 0 };
5014
}
5015
self.state[operands.dst].set_u32x4(c);
5016
ControlFlow::Continue(())
5017
}
5018
5019
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5020
fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5021
let a = self.state[operands.src1].get_i32x4();
5022
let b = self.state[operands.src2].get_i32x4();
5023
let mut c = [0; 4];
5024
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5025
*c = if a <= b { u32::MAX } else { 0 };
5026
}
5027
self.state[operands.dst].set_u32x4(c);
5028
ControlFlow::Continue(())
5029
}
5030
5031
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5032
fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5033
let a = self.state[operands.src1].get_u32x4();
5034
let b = self.state[operands.src2].get_u32x4();
5035
let mut c = [0; 4];
5036
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5037
*c = if a < b { u32::MAX } else { 0 };
5038
}
5039
self.state[operands.dst].set_u32x4(c);
5040
ControlFlow::Continue(())
5041
}
5042
5043
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5044
fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5045
let a = self.state[operands.src1].get_u32x4();
5046
let b = self.state[operands.src2].get_u32x4();
5047
let mut c = [0; 4];
5048
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5049
*c = if a <= b { u32::MAX } else { 0 };
5050
}
5051
self.state[operands.dst].set_u32x4(c);
5052
ControlFlow::Continue(())
5053
}
5054
5055
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5056
fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5057
let a = self.state[operands.src1].get_u64x2();
5058
let b = self.state[operands.src2].get_u64x2();
5059
let mut c = [0; 2];
5060
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5061
*c = if a == b { u64::MAX } else { 0 };
5062
}
5063
self.state[operands.dst].set_u64x2(c);
5064
ControlFlow::Continue(())
5065
}
5066
5067
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5068
fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5069
let a = self.state[operands.src1].get_u64x2();
5070
let b = self.state[operands.src2].get_u64x2();
5071
let mut c = [0; 2];
5072
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5073
*c = if a != b { u64::MAX } else { 0 };
5074
}
5075
self.state[operands.dst].set_u64x2(c);
5076
ControlFlow::Continue(())
5077
}
5078
5079
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5080
fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5081
let a = self.state[operands.src1].get_i64x2();
5082
let b = self.state[operands.src2].get_i64x2();
5083
let mut c = [0; 2];
5084
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5085
*c = if a < b { u64::MAX } else { 0 };
5086
}
5087
self.state[operands.dst].set_u64x2(c);
5088
ControlFlow::Continue(())
5089
}
5090
5091
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5092
fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5093
let a = self.state[operands.src1].get_i64x2();
5094
let b = self.state[operands.src2].get_i64x2();
5095
let mut c = [0; 2];
5096
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5097
*c = if a <= b { u64::MAX } else { 0 };
5098
}
5099
self.state[operands.dst].set_u64x2(c);
5100
ControlFlow::Continue(())
5101
}
5102
5103
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5104
fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5105
let a = self.state[operands.src1].get_u64x2();
5106
let b = self.state[operands.src2].get_u64x2();
5107
let mut c = [0; 2];
5108
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5109
*c = if a < b { u64::MAX } else { 0 };
5110
}
5111
self.state[operands.dst].set_u64x2(c);
5112
ControlFlow::Continue(())
5113
}
5114
5115
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5116
fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5117
let a = self.state[operands.src1].get_u64x2();
5118
let b = self.state[operands.src2].get_u64x2();
5119
let mut c = [0; 2];
5120
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5121
*c = if a <= b { u64::MAX } else { 0 };
5122
}
5123
self.state[operands.dst].set_u64x2(c);
5124
ControlFlow::Continue(())
5125
}
5126
5127
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5128
fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5129
let a = self.state[src].get_i8x16();
5130
self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));
5131
ControlFlow::Continue(())
5132
}
5133
5134
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5135
fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5136
let a = self.state[src].get_i16x8();
5137
self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg()));
5138
ControlFlow::Continue(())
5139
}
5140
5141
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5142
fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5143
let a = self.state[src].get_i32x4();
5144
self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg()));
5145
ControlFlow::Continue(())
5146
}
5147
5148
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5149
fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5150
let a = self.state[src].get_i64x2();
5151
self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
5152
ControlFlow::Continue(())
5153
}
5154
5155
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5156
fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5157
let a = self.state[src].get_f64x2();
5158
self.state[dst].set_f64x2(a.map(|i| -i));
5159
ControlFlow::Continue(())
5160
}
5161
5162
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5163
fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5164
let mut a = self.state[operands.src1].get_i8x16();
5165
let b = self.state[operands.src2].get_i8x16();
5166
for (a, b) in a.iter_mut().zip(&b) {
5167
*a = (*a).min(*b);
5168
}
5169
self.state[operands.dst].set_i8x16(a);
5170
ControlFlow::Continue(())
5171
}
5172
5173
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5174
fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5175
let mut a = self.state[operands.src1].get_u8x16();
5176
let b = self.state[operands.src2].get_u8x16();
5177
for (a, b) in a.iter_mut().zip(&b) {
5178
*a = (*a).min(*b);
5179
}
5180
self.state[operands.dst].set_u8x16(a);
5181
ControlFlow::Continue(())
5182
}
5183
5184
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5185
fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5186
let mut a = self.state[operands.src1].get_i16x8();
5187
let b = self.state[operands.src2].get_i16x8();
5188
for (a, b) in a.iter_mut().zip(&b) {
5189
*a = (*a).min(*b);
5190
}
5191
self.state[operands.dst].set_i16x8(a);
5192
ControlFlow::Continue(())
5193
}
5194
5195
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5196
fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5197
let mut a = self.state[operands.src1].get_u16x8();
5198
let b = self.state[operands.src2].get_u16x8();
5199
for (a, b) in a.iter_mut().zip(&b) {
5200
*a = (*a).min(*b);
5201
}
5202
self.state[operands.dst].set_u16x8(a);
5203
ControlFlow::Continue(())
5204
}
5205
5206
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5207
fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5208
let mut a = self.state[operands.src1].get_i32x4();
5209
let b = self.state[operands.src2].get_i32x4();
5210
for (a, b) in a.iter_mut().zip(&b) {
5211
*a = (*a).min(*b);
5212
}
5213
self.state[operands.dst].set_i32x4(a);
5214
ControlFlow::Continue(())
5215
}
5216
5217
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5218
fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5219
let mut a = self.state[operands.src1].get_u32x4();
5220
let b = self.state[operands.src2].get_u32x4();
5221
for (a, b) in a.iter_mut().zip(&b) {
5222
*a = (*a).min(*b);
5223
}
5224
self.state[operands.dst].set_u32x4(a);
5225
ControlFlow::Continue(())
5226
}
5227
5228
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5229
fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5230
let mut a = self.state[operands.src1].get_i8x16();
5231
let b = self.state[operands.src2].get_i8x16();
5232
for (a, b) in a.iter_mut().zip(&b) {
5233
*a = (*a).max(*b);
5234
}
5235
self.state[operands.dst].set_i8x16(a);
5236
ControlFlow::Continue(())
5237
}
5238
5239
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5240
fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5241
let mut a = self.state[operands.src1].get_u8x16();
5242
let b = self.state[operands.src2].get_u8x16();
5243
for (a, b) in a.iter_mut().zip(&b) {
5244
*a = (*a).max(*b);
5245
}
5246
self.state[operands.dst].set_u8x16(a);
5247
ControlFlow::Continue(())
5248
}
5249
5250
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5251
fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5252
let mut a = self.state[operands.src1].get_i16x8();
5253
let b = self.state[operands.src2].get_i16x8();
5254
for (a, b) in a.iter_mut().zip(&b) {
5255
*a = (*a).max(*b);
5256
}
5257
self.state[operands.dst].set_i16x8(a);
5258
ControlFlow::Continue(())
5259
}
5260
5261
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5262
fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5263
let mut a = self.state[operands.src1].get_u16x8();
5264
let b = self.state[operands.src2].get_u16x8();
5265
for (a, b) in a.iter_mut().zip(&b) {
5266
*a = (*a).max(*b);
5267
}
5268
self.state[operands.dst].set_u16x8(a);
5269
ControlFlow::Continue(())
5270
}
5271
5272
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5273
fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5274
let mut a = self.state[operands.src1].get_i32x4();
5275
let b = self.state[operands.src2].get_i32x4();
5276
for (a, b) in a.iter_mut().zip(&b) {
5277
*a = (*a).max(*b);
5278
}
5279
self.state[operands.dst].set_i32x4(a);
5280
ControlFlow::Continue(())
5281
}
5282
5283
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5284
fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5285
let mut a = self.state[operands.src1].get_u32x4();
5286
let b = self.state[operands.src2].get_u32x4();
5287
for (a, b) in a.iter_mut().zip(&b) {
5288
*a = (*a).max(*b);
5289
}
5290
self.state[operands.dst].set_u32x4(a);
5291
ControlFlow::Continue(())
5292
}
5293
5294
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5295
fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5296
let a = self.state[src].get_i8x16();
5297
self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs()));
5298
ControlFlow::Continue(())
5299
}
5300
5301
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5302
fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5303
let a = self.state[src].get_i16x8();
5304
self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
5305
ControlFlow::Continue(())
5306
}
5307
5308
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5309
fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5310
let a = self.state[src].get_i32x4();
5311
self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs()));
5312
ControlFlow::Continue(())
5313
}
5314
5315
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5316
fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5317
let a = self.state[src].get_i64x2();
5318
self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs()));
5319
ControlFlow::Continue(())
5320
}
5321
5322
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5323
fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5324
let a = self.state[src].get_f32x4();
5325
self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
5326
ControlFlow::Continue(())
5327
}
5328
5329
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5330
fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5331
let a = self.state[src].get_f64x2();
5332
self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
5333
ControlFlow::Continue(())
5334
}
5335
5336
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5337
fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5338
let mut a = self.state[operands.src1].get_f32x4();
5339
let b = self.state[operands.src2].get_f32x4();
5340
for (a, b) in a.iter_mut().zip(&b) {
5341
*a = a.wasm_maximum(*b);
5342
}
5343
self.state[operands.dst].set_f32x4(a);
5344
ControlFlow::Continue(())
5345
}
5346
5347
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5348
fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5349
let mut a = self.state[operands.src1].get_f64x2();
5350
let b = self.state[operands.src2].get_f64x2();
5351
for (a, b) in a.iter_mut().zip(&b) {
5352
*a = a.wasm_maximum(*b);
5353
}
5354
self.state[operands.dst].set_f64x2(a);
5355
ControlFlow::Continue(())
5356
}
5357
5358
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5359
fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5360
let mut a = self.state[operands.src1].get_f32x4();
5361
let b = self.state[operands.src2].get_f32x4();
5362
for (a, b) in a.iter_mut().zip(&b) {
5363
*a = a.wasm_minimum(*b);
5364
}
5365
self.state[operands.dst].set_f32x4(a);
5366
ControlFlow::Continue(())
5367
}
5368
5369
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5370
fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5371
let mut a = self.state[operands.src1].get_f64x2();
5372
let b = self.state[operands.src2].get_f64x2();
5373
for (a, b) in a.iter_mut().zip(&b) {
5374
*a = a.wasm_minimum(*b);
5375
}
5376
self.state[operands.dst].set_f64x2(a);
5377
ControlFlow::Continue(())
5378
}
5379
5380
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5381
fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> {
5382
let a = self.state[src1].get_u8x16();
5383
let b = self.state[src2].get_u8x16();
5384
let result = mask.to_le_bytes().map(|m| {
5385
if m < 16 {
5386
a[m as usize]
5387
} else {
5388
b[m as usize - 16]
5389
}
5390
});
5391
self.state[dst].set_u8x16(result);
5392
ControlFlow::Continue(())
5393
}
5394
5395
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5396
fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5397
let src1 = self.state[operands.src1].get_i8x16();
5398
let src2 = self.state[operands.src2].get_i8x16();
5399
let mut dst = [0i8; 16];
5400
for (i, &idx) in src2.iter().enumerate() {
5401
if (idx as usize) < 16 {
5402
dst[i] = src1[idx as usize];
5403
} else {
5404
dst[i] = 0
5405
}
5406
}
5407
self.state[operands.dst].set_i8x16(dst);
5408
ControlFlow::Continue(())
5409
}
5410
5411
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5412
fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5413
let mut a = self.state[operands.src1].get_u8x16();
5414
let b = self.state[operands.src2].get_u8x16();
5415
for (a, b) in a.iter_mut().zip(&b) {
5416
// use wider precision to avoid overflow
5417
*a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8;
5418
}
5419
self.state[operands.dst].set_u8x16(a);
5420
ControlFlow::Continue(())
5421
}
5422
5423
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5424
fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5425
let mut a = self.state[operands.src1].get_u16x8();
5426
let b = self.state[operands.src2].get_u16x8();
5427
for (a, b) in a.iter_mut().zip(&b) {
5428
// use wider precision to avoid overflow
5429
*a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
5430
}
5431
self.state[operands.dst].set_u16x8(a);
5432
ControlFlow::Continue(())
5433
}
5434
5435
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5436
fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5437
let a = self.state[operands.src1].get_f32x4();
5438
let b = self.state[operands.src2].get_f32x4();
5439
let mut c = [0; 4];
5440
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5441
*c = if a == b { u32::MAX } else { 0 };
5442
}
5443
self.state[operands.dst].set_u32x4(c);
5444
ControlFlow::Continue(())
5445
}
5446
5447
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5448
fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5449
let a = self.state[operands.src1].get_f32x4();
5450
let b = self.state[operands.src2].get_f32x4();
5451
let mut c = [0; 4];
5452
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5453
*c = if a != b { u32::MAX } else { 0 };
5454
}
5455
self.state[operands.dst].set_u32x4(c);
5456
ControlFlow::Continue(())
5457
}
5458
5459
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5460
fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5461
let a = self.state[operands.src1].get_f32x4();
5462
let b = self.state[operands.src2].get_f32x4();
5463
let mut c = [0; 4];
5464
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5465
*c = if a < b { u32::MAX } else { 0 };
5466
}
5467
self.state[operands.dst].set_u32x4(c);
5468
ControlFlow::Continue(())
5469
}
5470
5471
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5472
fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5473
let a = self.state[operands.src1].get_f32x4();
5474
let b = self.state[operands.src2].get_f32x4();
5475
let mut c = [0; 4];
5476
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5477
*c = if a <= b { u32::MAX } else { 0 };
5478
}
5479
self.state[operands.dst].set_u32x4(c);
5480
ControlFlow::Continue(())
5481
}
5482
5483
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5484
fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5485
let a = self.state[operands.src1].get_f64x2();
5486
let b = self.state[operands.src2].get_f64x2();
5487
let mut c = [0; 2];
5488
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5489
*c = if a == b { u64::MAX } else { 0 };
5490
}
5491
self.state[operands.dst].set_u64x2(c);
5492
ControlFlow::Continue(())
5493
}
5494
5495
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5496
fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5497
let a = self.state[operands.src1].get_f64x2();
5498
let b = self.state[operands.src2].get_f64x2();
5499
let mut c = [0; 2];
5500
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5501
*c = if a != b { u64::MAX } else { 0 };
5502
}
5503
self.state[operands.dst].set_u64x2(c);
5504
ControlFlow::Continue(())
5505
}
5506
5507
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5508
fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5509
let a = self.state[operands.src1].get_f64x2();
5510
let b = self.state[operands.src2].get_f64x2();
5511
let mut c = [0; 2];
5512
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5513
*c = if a < b { u64::MAX } else { 0 };
5514
}
5515
self.state[operands.dst].set_u64x2(c);
5516
ControlFlow::Continue(())
5517
}
5518
5519
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5520
fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5521
let a = self.state[operands.src1].get_f64x2();
5522
let b = self.state[operands.src2].get_f64x2();
5523
let mut c = [0; 2];
5524
for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5525
*c = if a <= b { u64::MAX } else { 0 };
5526
}
5527
self.state[operands.dst].set_u64x2(c);
5528
ControlFlow::Continue(())
5529
}
5530
5531
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5532
fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5533
let mut a = self.state[a].get_f32x4();
5534
let b = self.state[b].get_f32x4();
5535
let c = self.state[c].get_f32x4();
5536
for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5537
*a = a.wasm_mul_add(b, c);
5538
}
5539
self.state[dst].set_f32x4(a);
5540
ControlFlow::Continue(())
5541
}
5542
5543
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5544
fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5545
let mut a = self.state[a].get_f64x2();
5546
let b = self.state[b].get_f64x2();
5547
let c = self.state[c].get_f64x2();
5548
for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5549
*a = a.wasm_mul_add(b, c);
5550
}
5551
self.state[dst].set_f64x2(a);
5552
ControlFlow::Continue(())
5553
}
5554
5555
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5556
fn vselect(
5557
&mut self,
5558
dst: VReg,
5559
cond: XReg,
5560
if_nonzero: VReg,
5561
if_zero: VReg,
5562
) -> ControlFlow<Done> {
5563
let result = if self.state[cond].get_u32() != 0 {
5564
self.state[if_nonzero]
5565
} else {
5566
self.state[if_zero]
5567
};
5568
self.state[dst] = result;
5569
ControlFlow::Continue(())
5570
}
5571
5572
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5573
fn xadd128(
5574
&mut self,
5575
dst_lo: XReg,
5576
dst_hi: XReg,
5577
lhs_lo: XReg,
5578
lhs_hi: XReg,
5579
rhs_lo: XReg,
5580
rhs_hi: XReg,
5581
) -> ControlFlow<Done> {
5582
let lhs = self.get_i128(lhs_lo, lhs_hi);
5583
let rhs = self.get_i128(rhs_lo, rhs_hi);
5584
let result = lhs.wrapping_add(rhs);
5585
self.set_i128(dst_lo, dst_hi, result);
5586
ControlFlow::Continue(())
5587
}
5588
5589
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5590
fn xsub128(
5591
&mut self,
5592
dst_lo: XReg,
5593
dst_hi: XReg,
5594
lhs_lo: XReg,
5595
lhs_hi: XReg,
5596
rhs_lo: XReg,
5597
rhs_hi: XReg,
5598
) -> ControlFlow<Done> {
5599
let lhs = self.get_i128(lhs_lo, lhs_hi);
5600
let rhs = self.get_i128(rhs_lo, rhs_hi);
5601
let result = lhs.wrapping_sub(rhs);
5602
self.set_i128(dst_lo, dst_hi, result);
5603
ControlFlow::Continue(())
5604
}
5605
5606
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5607
fn xwidemul64_s(
5608
&mut self,
5609
dst_lo: XReg,
5610
dst_hi: XReg,
5611
lhs: XReg,
5612
rhs: XReg,
5613
) -> ControlFlow<Done> {
5614
let lhs = self.state[lhs].get_i64();
5615
let rhs = self.state[rhs].get_i64();
5616
let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
5617
self.set_i128(dst_lo, dst_hi, result);
5618
ControlFlow::Continue(())
5619
}
5620
5621
#[interp_disable_if_cfg(pulley_disable_interp_simd)]
5622
fn xwidemul64_u(
5623
&mut self,
5624
dst_lo: XReg,
5625
dst_hi: XReg,
5626
lhs: XReg,
5627
rhs: XReg,
5628
) -> ControlFlow<Done> {
5629
let lhs = self.state[lhs].get_u64();
5630
let rhs = self.state[rhs].get_u64();
5631
let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
5632
self.set_i128(dst_lo, dst_hi, result as i128);
5633
ControlFlow::Continue(())
5634
}
5635
}
5636
5637