Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/x64/lower/isle.rs
3088 views
1
//! ISLE integration glue code for x64 lowering.
2
3
// Pull in the ISLE generated code.
4
pub(crate) mod generated_code;
5
use crate::{ir::AtomicRmwOp, ir::types};
6
use generated_code::{AssemblerOutputs, Context, MInst, RegisterClass};
7
8
// Types that the generated ISLE code uses via `use super::*`.
9
use super::external::{CraneliftRegisters, PairedGpr, PairedXmm, isle_assembler_methods};
10
use super::{MergeableLoadSize, is_int_or_ref_ty, is_mergeable_load, lower_to_amode};
11
use crate::ir::condcodes::{FloatCC, IntCC};
12
use crate::ir::immediates::*;
13
use crate::ir::types::*;
14
use crate::ir::{
15
BlockCall, Inst, InstructionData, LibCall, MemFlags, Opcode, TrapCode, Value, ValueList,
16
};
17
use crate::isa::x64::X64Backend;
18
use crate::isa::x64::inst::{ReturnCallInfo, args::*, regs};
19
use crate::isa::x64::lower::{InsnInput, emit_vm_call};
20
use crate::machinst::isle::*;
21
use crate::machinst::{
22
ArgPair, CallArgList, CallInfo, CallRetList, InstOutput, MachInst, VCodeConstant,
23
VCodeConstantData,
24
};
25
use alloc::boxed::Box;
26
use alloc::vec::Vec;
27
use cranelift_assembler_x64 as asm;
28
use regalloc2::PReg;
29
30
/// Type representing out-of-line data for calls. This type optional because the
31
/// call instruction is also used by Winch to emit calls, but the
32
/// `Box<CallInfo>` field is not used, it's only used by Cranelift. By making it
33
/// optional, we reduce the number of heap allocations in Winch.
34
type BoxCallInfo = Box<CallInfo<ExternalName>>;
35
type BoxCallIndInfo = Box<CallInfo<RegMem>>;
36
type BoxReturnCallInfo = Box<ReturnCallInfo<ExternalName>>;
37
type BoxReturnCallIndInfo = Box<ReturnCallInfo<Reg>>;
38
type VecArgPair = Vec<ArgPair>;
39
type BoxSyntheticAmode = Box<SyntheticAmode>;
40
41
/// When interacting with the external assembler (see `external.rs`), we
42
/// need to fix the types we'll use.
43
type AssemblerInst = asm::Inst<CraneliftRegisters>;
44
45
pub struct SinkableLoad {
46
inst: Inst,
47
addr_input: InsnInput,
48
offset: i32,
49
}
50
51
/// The main entry point for lowering with ISLE.
52
pub(crate) fn lower(
53
lower_ctx: &mut Lower<MInst>,
54
backend: &X64Backend,
55
inst: Inst,
56
) -> Option<InstOutput> {
57
// TODO: reuse the ISLE context across lowerings so we can reuse its
58
// internal heap allocations.
59
let mut isle_ctx = IsleContext { lower_ctx, backend };
60
generated_code::constructor_lower(&mut isle_ctx, inst)
61
}
62
63
pub(crate) fn lower_branch(
64
lower_ctx: &mut Lower<MInst>,
65
backend: &X64Backend,
66
branch: Inst,
67
targets: &[MachLabel],
68
) -> Option<()> {
69
// TODO: reuse the ISLE context across lowerings so we can reuse its
70
// internal heap allocations.
71
let mut isle_ctx = IsleContext { lower_ctx, backend };
72
generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets)
73
}
74
75
impl Context for IsleContext<'_, '_, MInst, X64Backend> {
76
isle_lower_prelude_methods!();
77
isle_assembler_methods!();
78
79
fn gen_call_info(
80
&mut self,
81
sig: Sig,
82
dest: ExternalName,
83
uses: CallArgList,
84
defs: CallRetList,
85
try_call_info: Option<TryCallInfo>,
86
patchable: bool,
87
) -> BoxCallInfo {
88
let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
89
let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
90
self.lower_ctx
91
.abi_mut()
92
.accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
93
94
Box::new(
95
self.lower_ctx
96
.gen_call_info(sig, dest, uses, defs, try_call_info, patchable),
97
)
98
}
99
100
fn gen_call_ind_info(
101
&mut self,
102
sig: Sig,
103
dest: &RegMem,
104
uses: CallArgList,
105
defs: CallRetList,
106
try_call_info: Option<TryCallInfo>,
107
) -> BoxCallIndInfo {
108
let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
109
let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
110
self.lower_ctx
111
.abi_mut()
112
.accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
113
114
Box::new(
115
self.lower_ctx
116
.gen_call_info(sig, dest.clone(), uses, defs, try_call_info, false),
117
)
118
}
119
120
fn gen_return_call_info(
121
&mut self,
122
sig: Sig,
123
dest: ExternalName,
124
uses: CallArgList,
125
) -> BoxReturnCallInfo {
126
let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
127
self.lower_ctx
128
.abi_mut()
129
.accumulate_tail_args_size(new_stack_arg_size);
130
131
Box::new(ReturnCallInfo {
132
dest,
133
uses,
134
tmp: self.lower_ctx.temp_writable_gpr(),
135
new_stack_arg_size,
136
})
137
}
138
139
fn gen_return_call_ind_info(
140
&mut self,
141
sig: Sig,
142
dest: Reg,
143
uses: CallArgList,
144
) -> BoxReturnCallIndInfo {
145
let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
146
self.lower_ctx
147
.abi_mut()
148
.accumulate_tail_args_size(new_stack_arg_size);
149
150
Box::new(ReturnCallInfo {
151
dest,
152
uses,
153
tmp: self.lower_ctx.temp_writable_gpr(),
154
new_stack_arg_size,
155
})
156
}
157
158
#[inline]
159
fn operand_size_of_type_32_64(&mut self, ty: Type) -> OperandSize {
160
if ty.bits() == 64 {
161
OperandSize::Size64
162
} else {
163
OperandSize::Size32
164
}
165
}
166
167
#[inline]
168
fn raw_operand_size_of_type(&mut self, ty: Type) -> OperandSize {
169
OperandSize::from_ty(ty)
170
}
171
172
fn put_in_reg_mem_imm(&mut self, val: Value) -> RegMemImm {
173
if let Some(imm) = self.i64_from_iconst(val) {
174
if let Ok(imm) = i32::try_from(imm) {
175
return RegMemImm::Imm {
176
simm32: imm.cast_unsigned(),
177
};
178
}
179
}
180
181
self.put_in_reg_mem(val).into()
182
}
183
184
fn put_in_xmm_mem_imm(&mut self, val: Value) -> XmmMemImm {
185
if let Some(imm) = self.i64_from_iconst(val) {
186
if let Ok(imm) = i32::try_from(imm) {
187
return XmmMemImm::unwrap_new(RegMemImm::Imm {
188
simm32: imm.cast_unsigned(),
189
});
190
}
191
}
192
193
let res = match self.put_in_xmm_mem(val).to_reg_mem() {
194
RegMem::Reg { reg } => RegMemImm::Reg { reg },
195
RegMem::Mem { addr } => RegMemImm::Mem { addr },
196
};
197
198
XmmMemImm::unwrap_new(res)
199
}
200
201
fn put_in_xmm_mem(&mut self, val: Value) -> XmmMem {
202
let inputs = self.lower_ctx.get_value_as_source_or_const(val);
203
204
if let Some(c) = inputs.constant {
205
// A load from the constant pool is better than a rematerialization into a register,
206
// because it reduces register pressure.
207
//
208
// NOTE: this is where behavior differs from `put_in_reg_mem`, as we always force
209
// constants to be 16 bytes when a constant will be used in place of an xmm register.
210
let vcode_constant = self.emit_u128_le_const(c as u128);
211
return XmmMem::unwrap_new(RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant)));
212
}
213
214
XmmMem::unwrap_new(self.put_in_reg_mem(val))
215
}
216
217
fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
218
let inputs = self.lower_ctx.get_value_as_source_or_const(val);
219
220
if let Some(c) = inputs.constant {
221
// A load from the constant pool is better than a
222
// rematerialization into a register, because it reduces
223
// register pressure.
224
let vcode_constant = self.emit_u64_le_const(c);
225
return RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant));
226
}
227
228
if let Some(load) = self.sinkable_load(val) {
229
return RegMem::Mem {
230
addr: self.sink_load(&load),
231
};
232
}
233
234
RegMem::reg(self.put_in_reg(val))
235
}
236
237
#[inline]
238
fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 {
239
imm.encode()
240
}
241
242
#[inline]
243
fn encode_round_imm(&mut self, imm: &RoundImm) -> u8 {
244
imm.encode()
245
}
246
247
#[inline]
248
fn has_avx(&mut self) -> bool {
249
self.backend.x64_flags.has_avx()
250
}
251
252
#[inline]
253
fn use_avx2(&mut self) -> bool {
254
self.backend.x64_flags.has_avx() && self.backend.x64_flags.has_avx2()
255
}
256
257
#[inline]
258
fn has_avx512vl(&mut self) -> bool {
259
self.backend.x64_flags.has_avx512vl()
260
}
261
262
#[inline]
263
fn has_avx512dq(&mut self) -> bool {
264
self.backend.x64_flags.has_avx512dq()
265
}
266
267
#[inline]
268
fn has_avx512f(&mut self) -> bool {
269
self.backend.x64_flags.has_avx512f()
270
}
271
272
#[inline]
273
fn has_avx512bitalg(&mut self) -> bool {
274
self.backend.x64_flags.has_avx512bitalg()
275
}
276
277
#[inline]
278
fn has_avx512vbmi(&mut self) -> bool {
279
self.backend.x64_flags.has_avx512vbmi()
280
}
281
282
#[inline]
283
fn has_lzcnt(&mut self) -> bool {
284
self.backend.x64_flags.has_lzcnt()
285
}
286
287
#[inline]
288
fn has_bmi1(&mut self) -> bool {
289
self.backend.x64_flags.has_bmi1()
290
}
291
292
#[inline]
293
fn has_bmi2(&mut self) -> bool {
294
self.backend.x64_flags.has_bmi2()
295
}
296
297
#[inline]
298
fn use_popcnt(&mut self) -> bool {
299
self.backend.x64_flags.has_popcnt() && self.backend.x64_flags.has_sse42()
300
}
301
302
#[inline]
303
fn use_fma(&mut self) -> bool {
304
self.backend.x64_flags.has_avx() && self.backend.x64_flags.has_fma()
305
}
306
307
#[inline]
308
fn has_sse3(&mut self) -> bool {
309
self.backend.x64_flags.has_sse3()
310
}
311
312
#[inline]
313
fn has_ssse3(&mut self) -> bool {
314
self.backend.x64_flags.has_ssse3()
315
}
316
317
#[inline]
318
fn has_sse41(&mut self) -> bool {
319
self.backend.x64_flags.has_sse41()
320
}
321
322
#[inline]
323
fn use_sse42(&mut self) -> bool {
324
self.backend.x64_flags.has_sse41() && self.backend.x64_flags.has_sse42()
325
}
326
327
#[inline]
328
fn has_cmpxchg16b(&mut self) -> bool {
329
self.backend.x64_flags.has_cmpxchg16b()
330
}
331
332
#[inline]
333
fn shift_mask(&mut self, ty: Type) -> u8 {
334
debug_assert!(ty.lane_bits().is_power_of_two());
335
336
(ty.lane_bits() - 1) as u8
337
}
338
339
fn shift_amount_masked(&mut self, ty: Type, val: Imm64) -> u8 {
340
(val.bits() as u8) & self.shift_mask(ty)
341
}
342
343
#[inline]
344
fn simm32_from_value(&mut self, val: Value) -> Option<GprMemImm> {
345
let imm = self.i64_from_iconst(val)?;
346
Some(GprMemImm::unwrap_new(RegMemImm::Imm {
347
simm32: i32::try_from(imm).ok()?.cast_unsigned(),
348
}))
349
}
350
351
fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
352
if let Some(inst) = self.is_sinkable_inst(val) {
353
if let Some((addr_input, offset)) =
354
is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
355
{
356
return Some(SinkableLoad {
357
inst,
358
addr_input,
359
offset,
360
});
361
}
362
}
363
None
364
}
365
366
fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
367
if let Some(inst) = self.is_sinkable_inst(val) {
368
if let Some((addr_input, offset)) =
369
is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
370
{
371
return Some(SinkableLoad {
372
inst,
373
addr_input,
374
offset,
375
});
376
}
377
}
378
None
379
}
380
381
fn sink_load(&mut self, load: &SinkableLoad) -> SyntheticAmode {
382
self.lower_ctx.sink_inst(load.inst);
383
let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
384
SyntheticAmode::Real(addr)
385
}
386
387
#[inline]
388
fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
389
ExtMode::new(from_bits, to_bits).unwrap()
390
}
391
392
fn emit(&mut self, inst: &MInst) -> Unit {
393
self.lower_ctx.emit(inst.clone());
394
}
395
396
#[inline]
397
fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 {
398
// Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
399
// shifted into bits 5:6).
400
0b00_00_00_00 | lane << 4
401
}
402
403
#[inline]
404
fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
405
RegMem::mem(addr.clone())
406
}
407
408
#[inline]
409
fn amode_to_synthetic_amode(&mut self, amode: &Amode) -> SyntheticAmode {
410
amode.clone().into()
411
}
412
413
#[inline]
414
fn synthetic_amode_slot(&mut self, offset: i32) -> SyntheticAmode {
415
SyntheticAmode::SlotOffset { simm32: offset }
416
}
417
418
#[inline]
419
fn const_to_synthetic_amode(&mut self, c: VCodeConstant) -> SyntheticAmode {
420
SyntheticAmode::ConstantOffset(c)
421
}
422
423
#[inline]
424
fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
425
r.to_writable_reg()
426
}
427
428
#[inline]
429
fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
430
r.to_writable_reg()
431
}
432
433
fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
434
// When the shift amount is known, we can statically (i.e. at compile
435
// time) determine the mask to use and only emit that.
436
debug_assert!(amt < 8);
437
let mask_offset = amt as usize * 16;
438
let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
439
&I8X16_ISHL_MASKS[mask_offset..mask_offset + 16],
440
));
441
SyntheticAmode::ConstantOffset(mask_constant)
442
}
443
444
fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode {
445
let mask_table = self
446
.lower_ctx
447
.use_constant(VCodeConstantData::WellKnown(&I8X16_ISHL_MASKS));
448
SyntheticAmode::ConstantOffset(mask_table)
449
}
450
451
fn ushr_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
452
// When the shift amount is known, we can statically (i.e. at compile
453
// time) determine the mask to use and only emit that.
454
debug_assert!(amt < 8);
455
let mask_offset = amt as usize * 16;
456
let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
457
&I8X16_USHR_MASKS[mask_offset..mask_offset + 16],
458
));
459
SyntheticAmode::ConstantOffset(mask_constant)
460
}
461
462
fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode {
463
let mask_table = self
464
.lower_ctx
465
.use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
466
SyntheticAmode::ConstantOffset(mask_table)
467
}
468
469
#[inline]
470
fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
471
Writable::from_reg(Xmm::unwrap_new(r.to_reg()))
472
}
473
474
#[inline]
475
fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
476
r.to_reg()
477
}
478
479
#[inline]
480
fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
481
r.to_reg()
482
}
483
484
#[inline]
485
fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
486
r.into()
487
}
488
489
#[inline]
490
fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
491
r.into()
492
}
493
494
#[inline]
495
fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
496
r.into()
497
}
498
499
#[inline]
500
fn xmm_mem_to_xmm_mem_imm(&mut self, r: &XmmMem) -> XmmMemImm {
501
XmmMemImm::unwrap_new(r.clone().to_reg_mem().into())
502
}
503
504
#[inline]
505
fn temp_writable_gpr(&mut self) -> WritableGpr {
506
self.lower_ctx.temp_writable_gpr()
507
}
508
509
#[inline]
510
fn temp_writable_xmm(&mut self) -> WritableXmm {
511
self.lower_ctx.temp_writable_xmm()
512
}
513
514
#[inline]
515
fn reg_to_reg_mem_imm(&mut self, reg: Reg) -> RegMemImm {
516
RegMemImm::Reg { reg }
517
}
518
519
#[inline]
520
fn reg_mem_to_xmm_mem(&mut self, rm: &RegMem) -> XmmMem {
521
XmmMem::unwrap_new(rm.clone())
522
}
523
524
#[inline]
525
fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
526
GprMemImm::unwrap_new(rmi.clone())
527
}
528
529
#[inline]
530
fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
531
XmmMemImm::unwrap_new(rmi.clone())
532
}
533
534
#[inline]
535
fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
536
r.into()
537
}
538
539
#[inline]
540
fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
541
xm.clone().into()
542
}
543
544
#[inline]
545
fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
546
gm.clone().into()
547
}
548
549
#[inline]
550
fn xmm_new(&mut self, r: Reg) -> Xmm {
551
Xmm::unwrap_new(r)
552
}
553
554
#[inline]
555
fn gpr_new(&mut self, r: Reg) -> Gpr {
556
Gpr::unwrap_new(r)
557
}
558
559
#[inline]
560
fn reg_mem_to_gpr_mem(&mut self, rm: &RegMem) -> GprMem {
561
GprMem::unwrap_new(rm.clone())
562
}
563
564
#[inline]
565
fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
566
GprMem::unwrap_new(RegMem::reg(r))
567
}
568
569
#[inline]
570
fn gpr_to_gpr_mem(&mut self, gpr: Gpr) -> GprMem {
571
GprMem::from(gpr)
572
}
573
574
#[inline]
575
fn gpr_to_gpr_mem_imm(&mut self, gpr: Gpr) -> GprMemImm {
576
GprMemImm::from(gpr)
577
}
578
579
#[inline]
580
fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
581
if is_int_or_ref_ty(ty) || ty == I128 {
582
Some(RegisterClass::Gpr {
583
single_register: ty != I128,
584
})
585
} else if ty.is_float() || (ty.is_vector() && ty.bits() <= 128) {
586
Some(RegisterClass::Xmm)
587
} else {
588
None
589
}
590
}
591
592
#[inline]
593
fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
594
match ty {
595
types::I8 | types::I16 | types::I32 | types::I64 => Some(()),
596
_ => None,
597
}
598
}
599
600
#[inline]
601
fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
602
CC::from_intcc(*intcc)
603
}
604
605
#[inline]
606
fn cc_invert(&mut self, cc: &CC) -> CC {
607
cc.invert()
608
}
609
610
#[inline]
611
fn cc_nz_or_z(&mut self, cc: &CC) -> Option<CC> {
612
match cc {
613
CC::Z => Some(*cc),
614
CC::NZ => Some(*cc),
615
_ => None,
616
}
617
}
618
619
#[inline]
620
fn sum_extend_fits_in_32_bits(
621
&mut self,
622
extend_from_ty: Type,
623
constant_value: Imm64,
624
offset: Offset32,
625
) -> Option<u32> {
626
let offset: i64 = offset.into();
627
let constant_value: u64 = constant_value.bits() as u64;
628
// If necessary, zero extend `constant_value` up to 64 bits.
629
let shift = 64 - extend_from_ty.bits();
630
let zero_extended_constant_value = (constant_value << shift) >> shift;
631
// Sum up the two operands.
632
let sum = offset.wrapping_add(zero_extended_constant_value as i64);
633
// Check that the sum will fit in 32-bits.
634
if sum == ((sum << 32) >> 32) {
635
Some(sum as u32)
636
} else {
637
None
638
}
639
}
640
641
#[inline]
642
fn amode_offset(&mut self, addr: &SyntheticAmode, offset: i32) -> SyntheticAmode {
643
addr.offset(offset)
644
}
645
646
#[inline]
647
fn zero_offset(&mut self) -> Offset32 {
648
Offset32::new(0)
649
}
650
651
#[inline]
652
fn preg_rbp(&mut self) -> PReg {
653
regs::rbp().to_real_reg().unwrap().into()
654
}
655
656
#[inline]
657
fn preg_rsp(&mut self) -> PReg {
658
regs::rsp().to_real_reg().unwrap().into()
659
}
660
661
#[inline]
662
fn preg_pinned(&mut self) -> PReg {
663
regs::pinned_reg().to_real_reg().unwrap().into()
664
}
665
666
fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg {
667
let outputs = emit_vm_call(
668
self.lower_ctx,
669
&self.backend.flags,
670
&self.backend.triple,
671
*libcall,
672
&[ValueRegs::one(a)],
673
)
674
.expect("Failed to emit LibCall");
675
676
debug_assert_eq!(outputs.len(), 1);
677
678
outputs[0].only_reg().unwrap()
679
}
680
681
fn libcall_2(&mut self, libcall: &LibCall, a: Reg, b: Reg) -> Reg {
682
let outputs = emit_vm_call(
683
self.lower_ctx,
684
&self.backend.flags,
685
&self.backend.triple,
686
*libcall,
687
&[ValueRegs::one(a), ValueRegs::one(b)],
688
)
689
.expect("Failed to emit LibCall");
690
691
debug_assert_eq!(outputs.len(), 1);
692
693
outputs[0].only_reg().unwrap()
694
}
695
696
fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg {
697
let outputs = emit_vm_call(
698
self.lower_ctx,
699
&self.backend.flags,
700
&self.backend.triple,
701
*libcall,
702
&[ValueRegs::one(a), ValueRegs::one(b), ValueRegs::one(c)],
703
)
704
.expect("Failed to emit LibCall");
705
706
debug_assert_eq!(outputs.len(), 1);
707
708
outputs[0].only_reg().unwrap()
709
}
710
711
#[inline]
712
fn vconst_all_ones_or_all_zeros(&mut self, constant: Constant) -> Option<()> {
713
let const_data = self.lower_ctx.get_constant_data(constant);
714
if const_data.iter().all(|&b| b == 0 || b == 0xFF) {
715
return Some(());
716
}
717
None
718
}
719
720
#[inline]
721
fn shuffle_0_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
722
let mask = mask
723
.iter()
724
.map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
725
.map(|b| if b > 15 { 0b10000000 } else { b })
726
.collect();
727
self.lower_ctx
728
.use_constant(VCodeConstantData::Generated(mask))
729
}
730
731
#[inline]
732
fn shuffle_0_15_mask(&mut self, mask: &VecMask) -> VCodeConstant {
733
let mask = mask
734
.iter()
735
.map(|&b| if b > 15 { 0b10000000 } else { b })
736
.collect();
737
self.lower_ctx
738
.use_constant(VCodeConstantData::Generated(mask))
739
}
740
741
#[inline]
742
fn shuffle_16_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
743
let mask = mask
744
.iter()
745
.map(|&b| b.wrapping_sub(16))
746
.map(|b| if b > 15 { 0b10000000 } else { b })
747
.collect();
748
self.lower_ctx
749
.use_constant(VCodeConstantData::Generated(mask))
750
}
751
752
#[inline]
753
fn perm_from_mask_with_zeros(
754
&mut self,
755
mask: &VecMask,
756
) -> Option<(VCodeConstant, VCodeConstant)> {
757
if !mask.iter().any(|&b| b > 31) {
758
return None;
759
}
760
761
let zeros = mask
762
.iter()
763
.map(|&b| if b > 31 { 0x00 } else { 0xff })
764
.collect();
765
766
Some((
767
self.perm_from_mask(mask),
768
self.lower_ctx
769
.use_constant(VCodeConstantData::Generated(zeros)),
770
))
771
}
772
773
#[inline]
774
fn perm_from_mask(&mut self, mask: &VecMask) -> VCodeConstant {
775
let mask = mask.iter().cloned().collect();
776
self.lower_ctx
777
.use_constant(VCodeConstantData::Generated(mask))
778
}
779
780
fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
781
match XmmMemAligned::new(arg.clone().into()) {
782
Some(aligned) => aligned,
783
None => match arg.clone().into() {
784
RegMem::Mem { addr } => self.load_xmm_unaligned(addr).into(),
785
_ => unreachable!(),
786
},
787
}
788
}
789
790
fn xmm_mem_imm_to_xmm_mem_aligned_imm(&mut self, arg: &XmmMemImm) -> XmmMemAlignedImm {
791
match XmmMemAlignedImm::new(arg.clone().into()) {
792
Some(aligned) => aligned,
793
None => match arg.clone().into() {
794
RegMemImm::Mem { addr } => self.load_xmm_unaligned(addr).into(),
795
_ => unreachable!(),
796
},
797
}
798
}
799
800
fn pshufd_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
801
let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
802
if a < 4 && b < 4 && c < 4 && d < 4 {
803
Some(a | (b << 2) | (c << 4) | (d << 6))
804
} else {
805
None
806
}
807
}
808
809
fn pshufd_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
810
let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
811
// When selecting from the right-hand-side, subtract these all by 4
812
// which will bail out if anything is less than 4. Afterwards the check
813
// is the same as `pshufd_lhs_imm` above.
814
let a = a.checked_sub(4)?;
815
let b = b.checked_sub(4)?;
816
let c = c.checked_sub(4)?;
817
let d = d.checked_sub(4)?;
818
if a < 4 && b < 4 && c < 4 && d < 4 {
819
Some(a | (b << 2) | (c << 4) | (d << 6))
820
} else {
821
None
822
}
823
}
824
825
fn shufps_imm(&mut self, imm: Immediate) -> Option<u8> {
826
// The `shufps` instruction selects the first two elements from the
827
// first vector and the second two elements from the second vector, so
828
// offset the third/fourth selectors by 4 and then make sure everything
829
// fits in 32-bits.
830
let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
831
let c = c.checked_sub(4)?;
832
let d = d.checked_sub(4)?;
833
if a < 4 && b < 4 && c < 4 && d < 4 {
834
Some(a | (b << 2) | (c << 4) | (d << 6))
835
} else {
836
None
837
}
838
}
839
840
fn shufps_rev_imm(&mut self, imm: Immediate) -> Option<u8> {
841
// This is almost the same as `shufps_imm` except the elements that are
842
// subtracted are reversed. This handles the case that `shufps`
843
// instruction can be emitted if the order of the operands are swapped.
844
let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
845
let a = a.checked_sub(4)?;
846
let b = b.checked_sub(4)?;
847
if a < 4 && b < 4 && c < 4 && d < 4 {
848
Some(a | (b << 2) | (c << 4) | (d << 6))
849
} else {
850
None
851
}
852
}
853
854
fn pshuflw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
855
// Similar to `shufps` except this operates over 16-bit values so four
856
// of them must be fixed and the other four must be in-range to encode
857
// in the immediate.
858
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
859
if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
860
Some(a | (b << 2) | (c << 4) | (d << 6))
861
} else {
862
None
863
}
864
}
865
866
fn pshuflw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
867
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
868
let a = a.checked_sub(8)?;
869
let b = b.checked_sub(8)?;
870
let c = c.checked_sub(8)?;
871
let d = d.checked_sub(8)?;
872
let e = e.checked_sub(8)?;
873
let f = f.checked_sub(8)?;
874
let g = g.checked_sub(8)?;
875
let h = h.checked_sub(8)?;
876
if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
877
Some(a | (b << 2) | (c << 4) | (d << 6))
878
} else {
879
None
880
}
881
}
882
883
fn pshufhw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
884
// Similar to `pshuflw` except that the first four operands must be
885
// fixed and the second four are offset by an extra 4 and tested to
886
// make sure they're all in the range [4, 8).
887
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
888
let e = e.checked_sub(4)?;
889
let f = f.checked_sub(4)?;
890
let g = g.checked_sub(4)?;
891
let h = h.checked_sub(4)?;
892
if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
893
Some(e | (f << 2) | (g << 4) | (h << 6))
894
} else {
895
None
896
}
897
}
898
899
fn pshufhw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
900
// Note that everything here is offset by at least 8 and the upper
901
// bits are offset by 12 to test they're in the range of [12, 16).
902
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
903
let a = a.checked_sub(8)?;
904
let b = b.checked_sub(8)?;
905
let c = c.checked_sub(8)?;
906
let d = d.checked_sub(8)?;
907
let e = e.checked_sub(12)?;
908
let f = f.checked_sub(12)?;
909
let g = g.checked_sub(12)?;
910
let h = h.checked_sub(12)?;
911
if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
912
Some(e | (f << 2) | (g << 4) | (h << 6))
913
} else {
914
None
915
}
916
}
917
918
fn palignr_imm_from_immediate(&mut self, imm: Immediate) -> Option<u8> {
919
let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
920
921
if bytes.windows(2).all(|a| a[0] + 1 == a[1]) {
922
Some(bytes[0])
923
} else {
924
None
925
}
926
}
927
928
fn pblendw_imm(&mut self, imm: Immediate) -> Option<u8> {
929
// First make sure that the shuffle immediate is selecting 16-bit lanes.
930
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
931
932
// Next build up an 8-bit mask from each of the bits of the selected
933
// lanes above. This instruction can only be used when each lane
934
// selector chooses from the corresponding lane in either of the two
935
// operands, meaning the Nth lane selection must satisfy `lane % 8 ==
936
// N`.
937
//
938
// This helper closure is used to calculate the value of the
939
// corresponding bit.
940
let bit = |x: u8, c: u8| {
941
if x % 8 == c {
942
if x < 8 { Some(0) } else { Some(1 << c) }
943
} else {
944
None
945
}
946
};
947
Some(
948
bit(a, 0)?
949
| bit(b, 1)?
950
| bit(c, 2)?
951
| bit(d, 3)?
952
| bit(e, 4)?
953
| bit(f, 5)?
954
| bit(g, 6)?
955
| bit(h, 7)?,
956
)
957
}
958
959
fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
960
XmmMemImm::unwrap_new(RegMemImm::imm(imm))
961
}
962
963
fn insert_i8x16_lane_hole(&mut self, hole_idx: u8) -> VCodeConstant {
964
let mask = -1i128 as u128;
965
self.emit_u128_le_const(mask ^ (0xff << (hole_idx * 8)))
966
}
967
968
fn writable_invalid_gpr(&mut self) -> WritableGpr {
969
let reg = Gpr::new(self.invalid_reg()).unwrap();
970
WritableGpr::from_reg(reg)
971
}
972
973
fn box_synthetic_amode(&mut self, amode: &SyntheticAmode) -> BoxSyntheticAmode {
974
Box::new(amode.clone())
975
}
976
977
////////////////////////////////////////////////////////////////////////////
978
///// External assembler methods.
979
////////////////////////////////////////////////////////////////////////////
980
981
fn is_imm8(&mut self, src: &GprMemImm) -> Option<u8> {
982
match src.clone().to_reg_mem_imm() {
983
RegMemImm::Imm { simm32 } => {
984
Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
985
}
986
_ => None,
987
}
988
}
989
990
fn is_imm8_xmm(&mut self, src: &XmmMemImm) -> Option<u8> {
991
match src.clone().to_reg_mem_imm() {
992
RegMemImm::Imm { simm32 } => {
993
Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
994
}
995
_ => None,
996
}
997
}
998
999
fn is_simm8(&mut self, src: &GprMemImm) -> Option<i8> {
1000
match src.clone().to_reg_mem_imm() {
1001
RegMemImm::Imm { simm32 } => Some(i8::try_from(simm32.cast_signed()).ok()?),
1002
_ => None,
1003
}
1004
}
1005
1006
fn is_imm16(&mut self, src: &GprMemImm) -> Option<u16> {
1007
match src.clone().to_reg_mem_imm() {
1008
RegMemImm::Imm { simm32 } => {
1009
Some(i16::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
1010
}
1011
_ => None,
1012
}
1013
}
1014
1015
fn is_simm16(&mut self, src: &GprMemImm) -> Option<i16> {
1016
match src.clone().to_reg_mem_imm() {
1017
RegMemImm::Imm { simm32 } => Some(i16::try_from(simm32.cast_signed()).ok()?),
1018
_ => None,
1019
}
1020
}
1021
1022
fn is_imm32(&mut self, src: &GprMemImm) -> Option<u32> {
1023
match src.clone().to_reg_mem_imm() {
1024
RegMemImm::Imm { simm32 } => Some(simm32),
1025
_ => None,
1026
}
1027
}
1028
1029
fn is_simm32(&mut self, src: &GprMemImm) -> Option<i32> {
1030
match src.clone().to_reg_mem_imm() {
1031
RegMemImm::Imm { simm32 } => Some(simm32 as i32),
1032
_ => None,
1033
}
1034
}
1035
1036
fn is_gpr(&mut self, src: &GprMemImm) -> Option<Gpr> {
1037
match src.clone().to_reg_mem_imm() {
1038
RegMemImm::Reg { reg } => Gpr::new(reg),
1039
_ => None,
1040
}
1041
}
1042
1043
fn is_xmm(&mut self, src: &XmmMem) -> Option<Xmm> {
1044
match src.clone().to_reg_mem() {
1045
RegMem::Reg { reg } => Xmm::new(reg),
1046
_ => None,
1047
}
1048
}
1049
1050
fn is_gpr_mem(&mut self, src: &GprMemImm) -> Option<GprMem> {
1051
match src.clone().to_reg_mem_imm() {
1052
RegMemImm::Reg { reg } => GprMem::new(RegMem::Reg { reg }),
1053
RegMemImm::Mem { addr } => GprMem::new(RegMem::Mem { addr }),
1054
_ => None,
1055
}
1056
}
1057
1058
fn is_xmm_mem(&mut self, src: &XmmMemImm) -> Option<XmmMem> {
1059
match src.clone().to_reg_mem_imm() {
1060
RegMemImm::Reg { reg } => XmmMem::new(RegMem::Reg { reg }),
1061
RegMemImm::Mem { addr } => XmmMem::new(RegMem::Mem { addr }),
1062
_ => None,
1063
}
1064
}
1065
1066
fn is_mem(&mut self, src: &XmmMem) -> Option<SyntheticAmode> {
1067
match src.clone().to_reg_mem() {
1068
RegMem::Reg { .. } => None,
1069
RegMem::Mem { addr } => Some(addr),
1070
}
1071
}
1072
1073
// Custom constructors for `mulx` which only calculates the high half of the
1074
// result meaning that the same output operand is used in both destination
1075
// registers. This is in contrast to the assembler-generated version of this
1076
// instruction which generates two distinct temporary registers for output
1077
// which calculates both the high and low halves of the result.
1078
1079
fn x64_mulxl_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1080
let ret = self.temp_writable_gpr();
1081
let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1082
let inst = asm::inst::mulxl_rvm::new(ret, ret, src1, src2);
1083
self.emit(&MInst::External { inst: inst.into() });
1084
ret.to_reg()
1085
}
1086
1087
fn x64_mulxq_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1088
let ret = self.temp_writable_gpr();
1089
let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1090
let inst = asm::inst::mulxq_rvm::new(ret, ret, src1, src2);
1091
self.emit(&MInst::External { inst: inst.into() });
1092
ret.to_reg()
1093
}
1094
1095
fn bt_imm(&mut self, val: u64) -> Option<u8> {
1096
if val.count_ones() == 1 {
1097
Some(u8::try_from(val.trailing_zeros()).unwrap())
1098
} else {
1099
None
1100
}
1101
}
1102
}
1103
1104
impl IsleContext<'_, '_, MInst, X64Backend> {
1105
fn load_xmm_unaligned(&mut self, addr: SyntheticAmode) -> Xmm {
1106
let tmp = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1107
self.lower_ctx.emit(MInst::External {
1108
inst: asm::inst::movdqu_a::new(
1109
Writable::from_reg(Xmm::unwrap_new(tmp.to_reg())),
1110
asm::XmmMem::Mem(addr.into()),
1111
)
1112
.into(),
1113
});
1114
Xmm::unwrap_new(tmp.to_reg())
1115
}
1116
1117
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1118
fn convert_gpr_to_assembler_read_write_gpr(&mut self, read: Gpr) -> asm::Gpr<PairedGpr> {
1119
let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1120
let write = WritableGpr::from_writable_reg(write).unwrap();
1121
asm::Gpr::new(PairedGpr { read, write })
1122
}
1123
1124
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1125
fn convert_gpr_to_assembler_fixed_read_write_gpr<const E: u8>(
1126
&mut self,
1127
read: Gpr,
1128
) -> asm::Fixed<PairedGpr, E> {
1129
let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1130
let write = WritableGpr::from_writable_reg(write).unwrap();
1131
asm::Fixed(PairedGpr { read, write })
1132
}
1133
1134
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1135
fn convert_xmm_to_assembler_read_write_xmm(&mut self, read: Xmm) -> asm::Xmm<PairedXmm> {
1136
let write = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1137
let write = WritableXmm::from_writable_reg(write).unwrap();
1138
asm::Xmm::new(PairedXmm { read, write })
1139
}
1140
1141
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1142
fn convert_gpr_mem_to_assembler_read_gpr_mem(&self, read: &GprMem) -> asm::GprMem<Gpr, Gpr> {
1143
match read.clone().into() {
1144
RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
1145
RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1146
}
1147
}
1148
1149
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1150
fn convert_xmm_mem_to_assembler_read_xmm_mem_aligned(
1151
&self,
1152
read: &XmmMemAligned,
1153
) -> asm::XmmMem<Xmm, Gpr> {
1154
match read.clone().into() {
1155
RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1156
RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1157
}
1158
}
1159
1160
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1161
fn convert_xmm_mem_to_assembler_read_xmm_mem(&self, read: &XmmMem) -> asm::XmmMem<Xmm, Gpr> {
1162
match read.clone().into() {
1163
RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1164
RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1165
}
1166
}
1167
1168
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1169
fn convert_xmm_mem_to_assembler_write_xmm_mem(
1170
&self,
1171
write: &XmmMem,
1172
) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1173
match write.clone().into() {
1174
RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1175
RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1176
}
1177
}
1178
1179
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1180
fn convert_xmm_mem_to_assembler_write_xmm_mem_aligned(
1181
&self,
1182
write: &XmmMemAligned,
1183
) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1184
match write.clone().into() {
1185
RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1186
RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1187
}
1188
}
1189
1190
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1191
fn convert_gpr_mem_to_assembler_read_write_gpr_mem(
1192
&mut self,
1193
read: &GprMem,
1194
) -> asm::GprMem<PairedGpr, Gpr> {
1195
match read.clone().into() {
1196
RegMem::Reg { reg } => asm::GprMem::Gpr(
1197
*self
1198
.convert_gpr_to_assembler_read_write_gpr(Gpr::new(reg).unwrap())
1199
.as_ref(),
1200
),
1201
RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1202
}
1203
}
1204
1205
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1206
fn convert_gpr_mem_to_assembler_write_gpr_mem(
1207
&mut self,
1208
read: &GprMem,
1209
) -> asm::GprMem<WritableGpr, Gpr> {
1210
match read.clone().into() {
1211
RegMem::Reg { reg } => asm::GprMem::Gpr(WritableGpr::from_reg(Gpr::new(reg).unwrap())),
1212
RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1213
}
1214
}
1215
1216
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1217
fn convert_amode_to_assembler_amode(&mut self, amode: &SyntheticAmode) -> asm::Amode<Gpr> {
1218
amode.clone().into()
1219
}
1220
}
1221
1222
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we
1223
// need to fix up the bits that migrate from one half of the lane to the
1224
// other. Each 16-byte mask is indexed by the shift amount: e.g. if we shift
1225
// right by 0 (no movement), we want to retain all the bits so we mask with
1226
// `0xff`; if we shift right by 1, we want to retain all bits except the MSB so
1227
// we mask with `0x7f`; etc.
1228
1229
#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1230
const I8X16_ISHL_MASKS: [u8; 128] = [
1231
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1232
0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
1233
0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
1234
0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
1235
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
1236
0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
1237
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
1238
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
1239
];
1240
1241
#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1242
const I8X16_USHR_MASKS: [u8; 128] = [
1243
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1244
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
1245
0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
1246
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
1247
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1248
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
1249
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
1250
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
1251
];
1252
1253