Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/x64/lower/isle.rs
1693 views
1
//! ISLE integration glue code for x64 lowering.
2
3
// Pull in the ISLE generated code.
4
pub(crate) mod generated_code;
5
use crate::{ir::AtomicRmwOp, ir::types};
6
use generated_code::{AssemblerOutputs, Context, MInst, RegisterClass};
7
8
// Types that the generated ISLE code uses via `use super::*`.
9
use super::external::{CraneliftRegisters, PairedGpr, PairedXmm, isle_assembler_methods};
10
use super::{MergeableLoadSize, is_int_or_ref_ty, is_mergeable_load, lower_to_amode};
11
use crate::ir::condcodes::{FloatCC, IntCC};
12
use crate::ir::immediates::*;
13
use crate::ir::types::*;
14
use crate::ir::{
15
BlockCall, Inst, InstructionData, LibCall, MemFlags, Opcode, TrapCode, Value, ValueList,
16
};
17
use crate::isa::x64::X64Backend;
18
use crate::isa::x64::inst::{ReturnCallInfo, args::*, regs};
19
use crate::isa::x64::lower::{InsnInput, emit_vm_call};
20
use crate::machinst::isle::*;
21
use crate::machinst::{
22
ArgPair, CallArgList, CallInfo, CallRetList, InstOutput, MachInst, VCodeConstant,
23
VCodeConstantData,
24
};
25
use alloc::vec::Vec;
26
use cranelift_assembler_x64 as asm;
27
use regalloc2::PReg;
28
use std::boxed::Box;
29
30
/// Type representing out-of-line data for calls. This type optional because the
31
/// call instruction is also used by Winch to emit calls, but the
32
/// `Box<CallInfo>` field is not used, it's only used by Cranelift. By making it
33
/// optional, we reduce the number of heap allocations in Winch.
34
type BoxCallInfo = Box<CallInfo<ExternalName>>;
35
type BoxCallIndInfo = Box<CallInfo<RegMem>>;
36
type BoxReturnCallInfo = Box<ReturnCallInfo<ExternalName>>;
37
type BoxReturnCallIndInfo = Box<ReturnCallInfo<Reg>>;
38
type VecArgPair = Vec<ArgPair>;
39
type BoxSyntheticAmode = Box<SyntheticAmode>;
40
41
/// When interacting with the external assembler (see `external.rs`), we
42
/// need to fix the types we'll use.
43
type AssemblerInst = asm::Inst<CraneliftRegisters>;
44
45
pub struct SinkableLoad {
46
inst: Inst,
47
addr_input: InsnInput,
48
offset: i32,
49
}
50
51
/// The main entry point for lowering with ISLE.
52
pub(crate) fn lower(
53
lower_ctx: &mut Lower<MInst>,
54
backend: &X64Backend,
55
inst: Inst,
56
) -> Option<InstOutput> {
57
// TODO: reuse the ISLE context across lowerings so we can reuse its
58
// internal heap allocations.
59
let mut isle_ctx = IsleContext { lower_ctx, backend };
60
generated_code::constructor_lower(&mut isle_ctx, inst)
61
}
62
63
pub(crate) fn lower_branch(
64
lower_ctx: &mut Lower<MInst>,
65
backend: &X64Backend,
66
branch: Inst,
67
targets: &[MachLabel],
68
) -> Option<()> {
69
// TODO: reuse the ISLE context across lowerings so we can reuse its
70
// internal heap allocations.
71
let mut isle_ctx = IsleContext { lower_ctx, backend };
72
generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets)
73
}
74
75
impl Context for IsleContext<'_, '_, MInst, X64Backend> {
76
isle_lower_prelude_methods!();
77
isle_assembler_methods!();
78
79
fn gen_call_info(
80
&mut self,
81
sig: Sig,
82
dest: ExternalName,
83
uses: CallArgList,
84
defs: CallRetList,
85
try_call_info: Option<TryCallInfo>,
86
) -> BoxCallInfo {
87
let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
88
let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
89
self.lower_ctx
90
.abi_mut()
91
.accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
92
93
Box::new(
94
self.lower_ctx
95
.gen_call_info(sig, dest, uses, defs, try_call_info),
96
)
97
}
98
99
fn gen_call_ind_info(
100
&mut self,
101
sig: Sig,
102
dest: &RegMem,
103
uses: CallArgList,
104
defs: CallRetList,
105
try_call_info: Option<TryCallInfo>,
106
) -> BoxCallIndInfo {
107
let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
108
let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
109
self.lower_ctx
110
.abi_mut()
111
.accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
112
113
Box::new(
114
self.lower_ctx
115
.gen_call_info(sig, dest.clone(), uses, defs, try_call_info),
116
)
117
}
118
119
fn gen_return_call_info(
120
&mut self,
121
sig: Sig,
122
dest: ExternalName,
123
uses: CallArgList,
124
) -> BoxReturnCallInfo {
125
let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
126
self.lower_ctx
127
.abi_mut()
128
.accumulate_tail_args_size(new_stack_arg_size);
129
130
Box::new(ReturnCallInfo {
131
dest,
132
uses,
133
tmp: self.lower_ctx.temp_writable_gpr(),
134
new_stack_arg_size,
135
})
136
}
137
138
fn gen_return_call_ind_info(
139
&mut self,
140
sig: Sig,
141
dest: Reg,
142
uses: CallArgList,
143
) -> BoxReturnCallIndInfo {
144
let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
145
self.lower_ctx
146
.abi_mut()
147
.accumulate_tail_args_size(new_stack_arg_size);
148
149
Box::new(ReturnCallInfo {
150
dest,
151
uses,
152
tmp: self.lower_ctx.temp_writable_gpr(),
153
new_stack_arg_size,
154
})
155
}
156
157
#[inline]
158
fn operand_size_of_type_32_64(&mut self, ty: Type) -> OperandSize {
159
if ty.bits() == 64 {
160
OperandSize::Size64
161
} else {
162
OperandSize::Size32
163
}
164
}
165
166
#[inline]
167
fn raw_operand_size_of_type(&mut self, ty: Type) -> OperandSize {
168
OperandSize::from_ty(ty)
169
}
170
171
fn put_in_reg_mem_imm(&mut self, val: Value) -> RegMemImm {
172
if let Some(imm) = self.i64_from_iconst(val) {
173
if let Ok(imm) = i32::try_from(imm) {
174
return RegMemImm::Imm {
175
simm32: imm.cast_unsigned(),
176
};
177
}
178
}
179
180
self.put_in_reg_mem(val).into()
181
}
182
183
fn put_in_xmm_mem_imm(&mut self, val: Value) -> XmmMemImm {
184
if let Some(imm) = self.i64_from_iconst(val) {
185
if let Ok(imm) = i32::try_from(imm) {
186
return XmmMemImm::unwrap_new(RegMemImm::Imm {
187
simm32: imm.cast_unsigned(),
188
});
189
}
190
}
191
192
let res = match self.put_in_xmm_mem(val).to_reg_mem() {
193
RegMem::Reg { reg } => RegMemImm::Reg { reg },
194
RegMem::Mem { addr } => RegMemImm::Mem { addr },
195
};
196
197
XmmMemImm::unwrap_new(res)
198
}
199
200
fn put_in_xmm_mem(&mut self, val: Value) -> XmmMem {
201
let inputs = self.lower_ctx.get_value_as_source_or_const(val);
202
203
if let Some(c) = inputs.constant {
204
// A load from the constant pool is better than a rematerialization into a register,
205
// because it reduces register pressure.
206
//
207
// NOTE: this is where behavior differs from `put_in_reg_mem`, as we always force
208
// constants to be 16 bytes when a constant will be used in place of an xmm register.
209
let vcode_constant = self.emit_u128_le_const(c as u128);
210
return XmmMem::unwrap_new(RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant)));
211
}
212
213
XmmMem::unwrap_new(self.put_in_reg_mem(val))
214
}
215
216
fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
217
let inputs = self.lower_ctx.get_value_as_source_or_const(val);
218
219
if let Some(c) = inputs.constant {
220
// A load from the constant pool is better than a
221
// rematerialization into a register, because it reduces
222
// register pressure.
223
let vcode_constant = self.emit_u64_le_const(c);
224
return RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant));
225
}
226
227
if let Some(load) = self.sinkable_load(val) {
228
return RegMem::Mem {
229
addr: self.sink_load(&load),
230
};
231
}
232
233
RegMem::reg(self.put_in_reg(val))
234
}
235
236
#[inline]
237
fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 {
238
imm.encode()
239
}
240
241
#[inline]
242
fn encode_round_imm(&mut self, imm: &RoundImm) -> u8 {
243
imm.encode()
244
}
245
246
#[inline]
247
fn use_avx(&mut self) -> bool {
248
self.backend.x64_flags.use_avx()
249
}
250
251
#[inline]
252
fn use_avx2(&mut self) -> bool {
253
self.backend.x64_flags.use_avx2()
254
}
255
256
#[inline]
257
fn use_avx512vl(&mut self) -> bool {
258
self.backend.x64_flags.use_avx512vl()
259
}
260
261
#[inline]
262
fn use_avx512dq(&mut self) -> bool {
263
self.backend.x64_flags.use_avx512dq()
264
}
265
266
#[inline]
267
fn use_avx512f(&mut self) -> bool {
268
self.backend.x64_flags.use_avx512f()
269
}
270
271
#[inline]
272
fn use_avx512bitalg(&mut self) -> bool {
273
self.backend.x64_flags.use_avx512bitalg()
274
}
275
276
#[inline]
277
fn use_avx512vbmi(&mut self) -> bool {
278
self.backend.x64_flags.use_avx512vbmi()
279
}
280
281
#[inline]
282
fn use_lzcnt(&mut self) -> bool {
283
self.backend.x64_flags.use_lzcnt()
284
}
285
286
#[inline]
287
fn use_bmi1(&mut self) -> bool {
288
self.backend.x64_flags.use_bmi1()
289
}
290
291
#[inline]
292
fn use_bmi2(&mut self) -> bool {
293
self.backend.x64_flags.use_bmi2()
294
}
295
296
#[inline]
297
fn use_popcnt(&mut self) -> bool {
298
self.backend.x64_flags.use_popcnt()
299
}
300
301
#[inline]
302
fn use_fma(&mut self) -> bool {
303
self.backend.x64_flags.use_fma()
304
}
305
306
#[inline]
307
fn use_sse3(&mut self) -> bool {
308
self.backend.x64_flags.use_sse3()
309
}
310
311
#[inline]
312
fn use_ssse3(&mut self) -> bool {
313
self.backend.x64_flags.use_ssse3()
314
}
315
316
#[inline]
317
fn use_sse41(&mut self) -> bool {
318
self.backend.x64_flags.use_sse41()
319
}
320
321
#[inline]
322
fn use_sse42(&mut self) -> bool {
323
self.backend.x64_flags.use_sse42()
324
}
325
326
#[inline]
327
fn use_cmpxchg16b(&mut self) -> bool {
328
self.backend.x64_flags.use_cmpxchg16b()
329
}
330
331
#[inline]
332
fn shift_mask(&mut self, ty: Type) -> u8 {
333
debug_assert!(ty.lane_bits().is_power_of_two());
334
335
(ty.lane_bits() - 1) as u8
336
}
337
338
fn shift_amount_masked(&mut self, ty: Type, val: Imm64) -> u8 {
339
(val.bits() as u8) & self.shift_mask(ty)
340
}
341
342
#[inline]
343
fn simm32_from_value(&mut self, val: Value) -> Option<GprMemImm> {
344
let imm = self.i64_from_iconst(val)?;
345
Some(GprMemImm::unwrap_new(RegMemImm::Imm {
346
simm32: i32::try_from(imm).ok()?.cast_unsigned(),
347
}))
348
}
349
350
fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
351
if let Some(inst) = self.is_sinkable_inst(val) {
352
if let Some((addr_input, offset)) =
353
is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
354
{
355
return Some(SinkableLoad {
356
inst,
357
addr_input,
358
offset,
359
});
360
}
361
}
362
None
363
}
364
365
fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
366
if let Some(inst) = self.is_sinkable_inst(val) {
367
if let Some((addr_input, offset)) =
368
is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
369
{
370
return Some(SinkableLoad {
371
inst,
372
addr_input,
373
offset,
374
});
375
}
376
}
377
None
378
}
379
380
fn sink_load(&mut self, load: &SinkableLoad) -> SyntheticAmode {
381
self.lower_ctx.sink_inst(load.inst);
382
let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
383
SyntheticAmode::Real(addr)
384
}
385
386
#[inline]
387
fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
388
ExtMode::new(from_bits, to_bits).unwrap()
389
}
390
391
fn emit(&mut self, inst: &MInst) -> Unit {
392
self.lower_ctx.emit(inst.clone());
393
}
394
395
#[inline]
396
fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 {
397
// Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
398
// shifted into bits 5:6).
399
0b00_00_00_00 | lane << 4
400
}
401
402
#[inline]
403
fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
404
RegMem::mem(addr.clone())
405
}
406
407
#[inline]
408
fn amode_to_synthetic_amode(&mut self, amode: &Amode) -> SyntheticAmode {
409
amode.clone().into()
410
}
411
412
#[inline]
413
fn const_to_synthetic_amode(&mut self, c: VCodeConstant) -> SyntheticAmode {
414
SyntheticAmode::ConstantOffset(c)
415
}
416
417
#[inline]
418
fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
419
r.to_writable_reg()
420
}
421
422
#[inline]
423
fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
424
r.to_writable_reg()
425
}
426
427
fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
428
// When the shift amount is known, we can statically (i.e. at compile
429
// time) determine the mask to use and only emit that.
430
debug_assert!(amt < 8);
431
let mask_offset = amt as usize * 16;
432
let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
433
&I8X16_ISHL_MASKS[mask_offset..mask_offset + 16],
434
));
435
SyntheticAmode::ConstantOffset(mask_constant)
436
}
437
438
fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode {
439
let mask_table = self
440
.lower_ctx
441
.use_constant(VCodeConstantData::WellKnown(&I8X16_ISHL_MASKS));
442
SyntheticAmode::ConstantOffset(mask_table)
443
}
444
445
fn ushr_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
446
// When the shift amount is known, we can statically (i.e. at compile
447
// time) determine the mask to use and only emit that.
448
debug_assert!(amt < 8);
449
let mask_offset = amt as usize * 16;
450
let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
451
&I8X16_USHR_MASKS[mask_offset..mask_offset + 16],
452
));
453
SyntheticAmode::ConstantOffset(mask_constant)
454
}
455
456
fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode {
457
let mask_table = self
458
.lower_ctx
459
.use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
460
SyntheticAmode::ConstantOffset(mask_table)
461
}
462
463
#[inline]
464
fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
465
Writable::from_reg(Xmm::unwrap_new(r.to_reg()))
466
}
467
468
#[inline]
469
fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
470
r.to_reg()
471
}
472
473
#[inline]
474
fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
475
r.to_reg()
476
}
477
478
#[inline]
479
fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
480
r.into()
481
}
482
483
#[inline]
484
fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
485
r.into()
486
}
487
488
#[inline]
489
fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
490
r.into()
491
}
492
493
#[inline]
494
fn xmm_mem_to_xmm_mem_imm(&mut self, r: &XmmMem) -> XmmMemImm {
495
XmmMemImm::unwrap_new(r.clone().to_reg_mem().into())
496
}
497
498
#[inline]
499
fn temp_writable_gpr(&mut self) -> WritableGpr {
500
self.lower_ctx.temp_writable_gpr()
501
}
502
503
#[inline]
504
fn temp_writable_xmm(&mut self) -> WritableXmm {
505
self.lower_ctx.temp_writable_xmm()
506
}
507
508
#[inline]
509
fn reg_to_reg_mem_imm(&mut self, reg: Reg) -> RegMemImm {
510
RegMemImm::Reg { reg }
511
}
512
513
#[inline]
514
fn reg_mem_to_xmm_mem(&mut self, rm: &RegMem) -> XmmMem {
515
XmmMem::unwrap_new(rm.clone())
516
}
517
518
#[inline]
519
fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
520
GprMemImm::unwrap_new(rmi.clone())
521
}
522
523
#[inline]
524
fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
525
XmmMemImm::unwrap_new(rmi.clone())
526
}
527
528
#[inline]
529
fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
530
r.into()
531
}
532
533
#[inline]
534
fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
535
xm.clone().into()
536
}
537
538
#[inline]
539
fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
540
gm.clone().into()
541
}
542
543
#[inline]
544
fn xmm_new(&mut self, r: Reg) -> Xmm {
545
Xmm::unwrap_new(r)
546
}
547
548
#[inline]
549
fn gpr_new(&mut self, r: Reg) -> Gpr {
550
Gpr::unwrap_new(r)
551
}
552
553
#[inline]
554
fn reg_mem_to_gpr_mem(&mut self, rm: &RegMem) -> GprMem {
555
GprMem::unwrap_new(rm.clone())
556
}
557
558
#[inline]
559
fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
560
GprMem::unwrap_new(RegMem::reg(r))
561
}
562
563
#[inline]
564
fn gpr_to_gpr_mem(&mut self, gpr: Gpr) -> GprMem {
565
GprMem::from(gpr)
566
}
567
568
#[inline]
569
fn gpr_to_gpr_mem_imm(&mut self, gpr: Gpr) -> GprMemImm {
570
GprMemImm::from(gpr)
571
}
572
573
#[inline]
574
fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
575
if is_int_or_ref_ty(ty) || ty == I128 {
576
Some(RegisterClass::Gpr {
577
single_register: ty != I128,
578
})
579
} else if ty.is_float() || (ty.is_vector() && ty.bits() <= 128) {
580
Some(RegisterClass::Xmm)
581
} else {
582
None
583
}
584
}
585
586
#[inline]
587
fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
588
match ty {
589
types::I8 | types::I16 | types::I32 | types::I64 => Some(()),
590
_ => None,
591
}
592
}
593
594
#[inline]
595
fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
596
CC::from_intcc(*intcc)
597
}
598
599
#[inline]
600
fn cc_invert(&mut self, cc: &CC) -> CC {
601
cc.invert()
602
}
603
604
#[inline]
605
fn cc_nz_or_z(&mut self, cc: &CC) -> Option<CC> {
606
match cc {
607
CC::Z => Some(*cc),
608
CC::NZ => Some(*cc),
609
_ => None,
610
}
611
}
612
613
#[inline]
614
fn sum_extend_fits_in_32_bits(
615
&mut self,
616
extend_from_ty: Type,
617
constant_value: Imm64,
618
offset: Offset32,
619
) -> Option<u32> {
620
let offset: i64 = offset.into();
621
let constant_value: u64 = constant_value.bits() as u64;
622
// If necessary, zero extend `constant_value` up to 64 bits.
623
let shift = 64 - extend_from_ty.bits();
624
let zero_extended_constant_value = (constant_value << shift) >> shift;
625
// Sum up the two operands.
626
let sum = offset.wrapping_add(zero_extended_constant_value as i64);
627
// Check that the sum will fit in 32-bits.
628
if sum == ((sum << 32) >> 32) {
629
Some(sum as u32)
630
} else {
631
None
632
}
633
}
634
635
#[inline]
636
fn amode_offset(&mut self, addr: &Amode, offset: i32) -> Amode {
637
addr.offset(offset)
638
}
639
640
#[inline]
641
fn zero_offset(&mut self) -> Offset32 {
642
Offset32::new(0)
643
}
644
645
#[inline]
646
fn preg_rbp(&mut self) -> PReg {
647
regs::rbp().to_real_reg().unwrap().into()
648
}
649
650
#[inline]
651
fn preg_rsp(&mut self) -> PReg {
652
regs::rsp().to_real_reg().unwrap().into()
653
}
654
655
#[inline]
656
fn preg_pinned(&mut self) -> PReg {
657
regs::pinned_reg().to_real_reg().unwrap().into()
658
}
659
660
fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg {
661
let outputs = emit_vm_call(
662
self.lower_ctx,
663
&self.backend.flags,
664
&self.backend.triple,
665
*libcall,
666
&[ValueRegs::one(a)],
667
)
668
.expect("Failed to emit LibCall");
669
670
debug_assert_eq!(outputs.len(), 1);
671
672
outputs[0].only_reg().unwrap()
673
}
674
675
fn libcall_2(&mut self, libcall: &LibCall, a: Reg, b: Reg) -> Reg {
676
let outputs = emit_vm_call(
677
self.lower_ctx,
678
&self.backend.flags,
679
&self.backend.triple,
680
*libcall,
681
&[ValueRegs::one(a), ValueRegs::one(b)],
682
)
683
.expect("Failed to emit LibCall");
684
685
debug_assert_eq!(outputs.len(), 1);
686
687
outputs[0].only_reg().unwrap()
688
}
689
690
fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg {
691
let outputs = emit_vm_call(
692
self.lower_ctx,
693
&self.backend.flags,
694
&self.backend.triple,
695
*libcall,
696
&[ValueRegs::one(a), ValueRegs::one(b), ValueRegs::one(c)],
697
)
698
.expect("Failed to emit LibCall");
699
700
debug_assert_eq!(outputs.len(), 1);
701
702
outputs[0].only_reg().unwrap()
703
}
704
705
#[inline]
706
fn vconst_all_ones_or_all_zeros(&mut self, constant: Constant) -> Option<()> {
707
let const_data = self.lower_ctx.get_constant_data(constant);
708
if const_data.iter().all(|&b| b == 0 || b == 0xFF) {
709
return Some(());
710
}
711
None
712
}
713
714
#[inline]
715
fn shuffle_0_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
716
let mask = mask
717
.iter()
718
.map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
719
.map(|b| if b > 15 { 0b10000000 } else { b })
720
.collect();
721
self.lower_ctx
722
.use_constant(VCodeConstantData::Generated(mask))
723
}
724
725
#[inline]
726
fn shuffle_0_15_mask(&mut self, mask: &VecMask) -> VCodeConstant {
727
let mask = mask
728
.iter()
729
.map(|&b| if b > 15 { 0b10000000 } else { b })
730
.collect();
731
self.lower_ctx
732
.use_constant(VCodeConstantData::Generated(mask))
733
}
734
735
#[inline]
736
fn shuffle_16_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
737
let mask = mask
738
.iter()
739
.map(|&b| b.wrapping_sub(16))
740
.map(|b| if b > 15 { 0b10000000 } else { b })
741
.collect();
742
self.lower_ctx
743
.use_constant(VCodeConstantData::Generated(mask))
744
}
745
746
#[inline]
747
fn perm_from_mask_with_zeros(
748
&mut self,
749
mask: &VecMask,
750
) -> Option<(VCodeConstant, VCodeConstant)> {
751
if !mask.iter().any(|&b| b > 31) {
752
return None;
753
}
754
755
let zeros = mask
756
.iter()
757
.map(|&b| if b > 31 { 0x00 } else { 0xff })
758
.collect();
759
760
Some((
761
self.perm_from_mask(mask),
762
self.lower_ctx
763
.use_constant(VCodeConstantData::Generated(zeros)),
764
))
765
}
766
767
#[inline]
768
fn perm_from_mask(&mut self, mask: &VecMask) -> VCodeConstant {
769
let mask = mask.iter().cloned().collect();
770
self.lower_ctx
771
.use_constant(VCodeConstantData::Generated(mask))
772
}
773
774
fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
775
match XmmMemAligned::new(arg.clone().into()) {
776
Some(aligned) => aligned,
777
None => match arg.clone().into() {
778
RegMem::Mem { addr } => self.load_xmm_unaligned(addr).into(),
779
_ => unreachable!(),
780
},
781
}
782
}
783
784
fn xmm_mem_imm_to_xmm_mem_aligned_imm(&mut self, arg: &XmmMemImm) -> XmmMemAlignedImm {
785
match XmmMemAlignedImm::new(arg.clone().into()) {
786
Some(aligned) => aligned,
787
None => match arg.clone().into() {
788
RegMemImm::Mem { addr } => self.load_xmm_unaligned(addr).into(),
789
_ => unreachable!(),
790
},
791
}
792
}
793
794
fn pshufd_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
795
let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
796
if a < 4 && b < 4 && c < 4 && d < 4 {
797
Some(a | (b << 2) | (c << 4) | (d << 6))
798
} else {
799
None
800
}
801
}
802
803
fn pshufd_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
804
let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
805
// When selecting from the right-hand-side, subtract these all by 4
806
// which will bail out if anything is less than 4. Afterwards the check
807
// is the same as `pshufd_lhs_imm` above.
808
let a = a.checked_sub(4)?;
809
let b = b.checked_sub(4)?;
810
let c = c.checked_sub(4)?;
811
let d = d.checked_sub(4)?;
812
if a < 4 && b < 4 && c < 4 && d < 4 {
813
Some(a | (b << 2) | (c << 4) | (d << 6))
814
} else {
815
None
816
}
817
}
818
819
fn shufps_imm(&mut self, imm: Immediate) -> Option<u8> {
820
// The `shufps` instruction selects the first two elements from the
821
// first vector and the second two elements from the second vector, so
822
// offset the third/fourth selectors by 4 and then make sure everything
823
// fits in 32-bits.
824
let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
825
let c = c.checked_sub(4)?;
826
let d = d.checked_sub(4)?;
827
if a < 4 && b < 4 && c < 4 && d < 4 {
828
Some(a | (b << 2) | (c << 4) | (d << 6))
829
} else {
830
None
831
}
832
}
833
834
fn shufps_rev_imm(&mut self, imm: Immediate) -> Option<u8> {
835
// This is almost the same as `shufps_imm` except the elements that are
836
// subtracted are reversed. This handles the case that `shufps`
837
// instruction can be emitted if the order of the operands are swapped.
838
let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
839
let a = a.checked_sub(4)?;
840
let b = b.checked_sub(4)?;
841
if a < 4 && b < 4 && c < 4 && d < 4 {
842
Some(a | (b << 2) | (c << 4) | (d << 6))
843
} else {
844
None
845
}
846
}
847
848
fn pshuflw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
849
// Similar to `shufps` except this operates over 16-bit values so four
850
// of them must be fixed and the other four must be in-range to encode
851
// in the immediate.
852
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
853
if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
854
Some(a | (b << 2) | (c << 4) | (d << 6))
855
} else {
856
None
857
}
858
}
859
860
fn pshuflw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
861
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
862
let a = a.checked_sub(8)?;
863
let b = b.checked_sub(8)?;
864
let c = c.checked_sub(8)?;
865
let d = d.checked_sub(8)?;
866
let e = e.checked_sub(8)?;
867
let f = f.checked_sub(8)?;
868
let g = g.checked_sub(8)?;
869
let h = h.checked_sub(8)?;
870
if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
871
Some(a | (b << 2) | (c << 4) | (d << 6))
872
} else {
873
None
874
}
875
}
876
877
fn pshufhw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
878
// Similar to `pshuflw` except that the first four operands must be
879
// fixed and the second four are offset by an extra 4 and tested to
880
// make sure they're all in the range [4, 8).
881
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
882
let e = e.checked_sub(4)?;
883
let f = f.checked_sub(4)?;
884
let g = g.checked_sub(4)?;
885
let h = h.checked_sub(4)?;
886
if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
887
Some(e | (f << 2) | (g << 4) | (h << 6))
888
} else {
889
None
890
}
891
}
892
893
fn pshufhw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
894
// Note that everything here is offset by at least 8 and the upper
895
// bits are offset by 12 to test they're in the range of [12, 16).
896
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
897
let a = a.checked_sub(8)?;
898
let b = b.checked_sub(8)?;
899
let c = c.checked_sub(8)?;
900
let d = d.checked_sub(8)?;
901
let e = e.checked_sub(12)?;
902
let f = f.checked_sub(12)?;
903
let g = g.checked_sub(12)?;
904
let h = h.checked_sub(12)?;
905
if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
906
Some(e | (f << 2) | (g << 4) | (h << 6))
907
} else {
908
None
909
}
910
}
911
912
fn palignr_imm_from_immediate(&mut self, imm: Immediate) -> Option<u8> {
913
let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
914
915
if bytes.windows(2).all(|a| a[0] + 1 == a[1]) {
916
Some(bytes[0])
917
} else {
918
None
919
}
920
}
921
922
fn pblendw_imm(&mut self, imm: Immediate) -> Option<u8> {
923
// First make sure that the shuffle immediate is selecting 16-bit lanes.
924
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
925
926
// Next build up an 8-bit mask from each of the bits of the selected
927
// lanes above. This instruction can only be used when each lane
928
// selector chooses from the corresponding lane in either of the two
929
// operands, meaning the Nth lane selection must satisfy `lane % 8 ==
930
// N`.
931
//
932
// This helper closure is used to calculate the value of the
933
// corresponding bit.
934
let bit = |x: u8, c: u8| {
935
if x % 8 == c {
936
if x < 8 { Some(0) } else { Some(1 << c) }
937
} else {
938
None
939
}
940
};
941
Some(
942
bit(a, 0)?
943
| bit(b, 1)?
944
| bit(c, 2)?
945
| bit(d, 3)?
946
| bit(e, 4)?
947
| bit(f, 5)?
948
| bit(g, 6)?
949
| bit(h, 7)?,
950
)
951
}
952
953
fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
954
XmmMemImm::unwrap_new(RegMemImm::imm(imm))
955
}
956
957
fn insert_i8x16_lane_hole(&mut self, hole_idx: u8) -> VCodeConstant {
958
let mask = -1i128 as u128;
959
self.emit_u128_le_const(mask ^ (0xff << (hole_idx * 8)))
960
}
961
962
fn writable_invalid_gpr(&mut self) -> WritableGpr {
963
let reg = Gpr::new(self.invalid_reg()).unwrap();
964
WritableGpr::from_reg(reg)
965
}
966
967
fn box_synthetic_amode(&mut self, amode: &SyntheticAmode) -> BoxSyntheticAmode {
968
Box::new(amode.clone())
969
}
970
971
////////////////////////////////////////////////////////////////////////////
972
///// External assembler methods.
973
////////////////////////////////////////////////////////////////////////////
974
975
fn is_imm8(&mut self, src: &GprMemImm) -> Option<u8> {
976
match src.clone().to_reg_mem_imm() {
977
RegMemImm::Imm { simm32 } => {
978
Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
979
}
980
_ => None,
981
}
982
}
983
984
fn is_imm8_xmm(&mut self, src: &XmmMemImm) -> Option<u8> {
985
match src.clone().to_reg_mem_imm() {
986
RegMemImm::Imm { simm32 } => {
987
Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
988
}
989
_ => None,
990
}
991
}
992
993
fn is_simm8(&mut self, src: &GprMemImm) -> Option<i8> {
994
match src.clone().to_reg_mem_imm() {
995
RegMemImm::Imm { simm32 } => Some(i8::try_from(simm32.cast_signed()).ok()?),
996
_ => None,
997
}
998
}
999
1000
fn is_imm16(&mut self, src: &GprMemImm) -> Option<u16> {
1001
match src.clone().to_reg_mem_imm() {
1002
RegMemImm::Imm { simm32 } => {
1003
Some(i16::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
1004
}
1005
_ => None,
1006
}
1007
}
1008
1009
fn is_simm16(&mut self, src: &GprMemImm) -> Option<i16> {
1010
match src.clone().to_reg_mem_imm() {
1011
RegMemImm::Imm { simm32 } => Some(i16::try_from(simm32.cast_signed()).ok()?),
1012
_ => None,
1013
}
1014
}
1015
1016
fn is_imm32(&mut self, src: &GprMemImm) -> Option<u32> {
1017
match src.clone().to_reg_mem_imm() {
1018
RegMemImm::Imm { simm32 } => Some(simm32),
1019
_ => None,
1020
}
1021
}
1022
1023
fn is_simm32(&mut self, src: &GprMemImm) -> Option<i32> {
1024
match src.clone().to_reg_mem_imm() {
1025
RegMemImm::Imm { simm32 } => Some(simm32 as i32),
1026
_ => None,
1027
}
1028
}
1029
1030
fn is_gpr(&mut self, src: &GprMemImm) -> Option<Gpr> {
1031
match src.clone().to_reg_mem_imm() {
1032
RegMemImm::Reg { reg } => Gpr::new(reg),
1033
_ => None,
1034
}
1035
}
1036
1037
fn is_xmm(&mut self, src: &XmmMem) -> Option<Xmm> {
1038
match src.clone().to_reg_mem() {
1039
RegMem::Reg { reg } => Xmm::new(reg),
1040
_ => None,
1041
}
1042
}
1043
1044
fn is_gpr_mem(&mut self, src: &GprMemImm) -> Option<GprMem> {
1045
match src.clone().to_reg_mem_imm() {
1046
RegMemImm::Reg { reg } => GprMem::new(RegMem::Reg { reg }),
1047
RegMemImm::Mem { addr } => GprMem::new(RegMem::Mem { addr }),
1048
_ => None,
1049
}
1050
}
1051
1052
fn is_xmm_mem(&mut self, src: &XmmMemImm) -> Option<XmmMem> {
1053
match src.clone().to_reg_mem_imm() {
1054
RegMemImm::Reg { reg } => XmmMem::new(RegMem::Reg { reg }),
1055
RegMemImm::Mem { addr } => XmmMem::new(RegMem::Mem { addr }),
1056
_ => None,
1057
}
1058
}
1059
1060
fn is_mem(&mut self, src: &XmmMem) -> Option<SyntheticAmode> {
1061
match src.clone().to_reg_mem() {
1062
RegMem::Reg { .. } => None,
1063
RegMem::Mem { addr } => Some(addr),
1064
}
1065
}
1066
1067
// Custom constructors for `mulx` which only calculates the high half of the
1068
// result meaning that the same output operand is used in both destination
1069
// registers. This is in contrast to the assembler-generated version of this
1070
// instruction which generates two distinct temporary registers for output
1071
// which calculates both the high and low halves of the result.
1072
1073
fn x64_mulxl_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1074
let ret = self.temp_writable_gpr();
1075
let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1076
let inst = asm::inst::mulxl_rvm::new(ret, ret, src1, src2);
1077
self.emit(&MInst::External { inst: inst.into() });
1078
ret.to_reg()
1079
}
1080
1081
fn x64_mulxq_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1082
let ret = self.temp_writable_gpr();
1083
let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1084
let inst = asm::inst::mulxq_rvm::new(ret, ret, src1, src2);
1085
self.emit(&MInst::External { inst: inst.into() });
1086
ret.to_reg()
1087
}
1088
1089
fn bt_imm(&mut self, val: u64) -> Option<u8> {
1090
if val.count_ones() == 1 {
1091
Some(u8::try_from(val.trailing_zeros()).unwrap())
1092
} else {
1093
None
1094
}
1095
}
1096
}
1097
1098
impl IsleContext<'_, '_, MInst, X64Backend> {
1099
fn load_xmm_unaligned(&mut self, addr: SyntheticAmode) -> Xmm {
1100
let tmp = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1101
self.lower_ctx.emit(MInst::External {
1102
inst: asm::inst::movdqu_a::new(
1103
Writable::from_reg(Xmm::unwrap_new(tmp.to_reg())),
1104
asm::XmmMem::Mem(addr.into()),
1105
)
1106
.into(),
1107
});
1108
Xmm::unwrap_new(tmp.to_reg())
1109
}
1110
1111
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1112
fn convert_gpr_to_assembler_read_write_gpr(&mut self, read: Gpr) -> asm::Gpr<PairedGpr> {
1113
let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1114
let write = WritableGpr::from_writable_reg(write).unwrap();
1115
asm::Gpr::new(PairedGpr { read, write })
1116
}
1117
1118
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1119
fn convert_gpr_to_assembler_fixed_read_write_gpr<const E: u8>(
1120
&mut self,
1121
read: Gpr,
1122
) -> asm::Fixed<PairedGpr, E> {
1123
let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1124
let write = WritableGpr::from_writable_reg(write).unwrap();
1125
asm::Fixed(PairedGpr { read, write })
1126
}
1127
1128
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1129
fn convert_xmm_to_assembler_read_write_xmm(&mut self, read: Xmm) -> asm::Xmm<PairedXmm> {
1130
let write = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1131
let write = WritableXmm::from_writable_reg(write).unwrap();
1132
asm::Xmm::new(PairedXmm { read, write })
1133
}
1134
1135
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1136
fn convert_gpr_mem_to_assembler_read_gpr_mem(&self, read: &GprMem) -> asm::GprMem<Gpr, Gpr> {
1137
match read.clone().into() {
1138
RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
1139
RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1140
}
1141
}
1142
1143
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1144
fn convert_xmm_mem_to_assembler_read_xmm_mem_aligned(
1145
&self,
1146
read: &XmmMemAligned,
1147
) -> asm::XmmMem<Xmm, Gpr> {
1148
match read.clone().into() {
1149
RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1150
RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1151
}
1152
}
1153
1154
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1155
fn convert_xmm_mem_to_assembler_read_xmm_mem(&self, read: &XmmMem) -> asm::XmmMem<Xmm, Gpr> {
1156
match read.clone().into() {
1157
RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1158
RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1159
}
1160
}
1161
1162
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1163
fn convert_xmm_mem_to_assembler_write_xmm_mem(
1164
&self,
1165
write: &XmmMem,
1166
) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1167
match write.clone().into() {
1168
RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1169
RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1170
}
1171
}
1172
1173
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1174
fn convert_xmm_mem_to_assembler_write_xmm_mem_aligned(
1175
&self,
1176
write: &XmmMemAligned,
1177
) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1178
match write.clone().into() {
1179
RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1180
RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1181
}
1182
}
1183
1184
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1185
fn convert_gpr_mem_to_assembler_read_write_gpr_mem(
1186
&mut self,
1187
read: &GprMem,
1188
) -> asm::GprMem<PairedGpr, Gpr> {
1189
match read.clone().into() {
1190
RegMem::Reg { reg } => asm::GprMem::Gpr(
1191
*self
1192
.convert_gpr_to_assembler_read_write_gpr(Gpr::new(reg).unwrap())
1193
.as_ref(),
1194
),
1195
RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1196
}
1197
}
1198
1199
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1200
fn convert_gpr_mem_to_assembler_write_gpr_mem(
1201
&mut self,
1202
read: &GprMem,
1203
) -> asm::GprMem<WritableGpr, Gpr> {
1204
match read.clone().into() {
1205
RegMem::Reg { reg } => asm::GprMem::Gpr(WritableGpr::from_reg(Gpr::new(reg).unwrap())),
1206
RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1207
}
1208
}
1209
1210
/// Helper used by code generated by the `cranelift-assembler-x64` crate.
1211
fn convert_amode_to_assembler_amode(&mut self, amode: &SyntheticAmode) -> asm::Amode<Gpr> {
1212
amode.clone().into()
1213
}
1214
}
1215
1216
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we
1217
// need to fix up the bits that migrate from one half of the lane to the
1218
// other. Each 16-byte mask is indexed by the shift amount: e.g. if we shift
1219
// right by 0 (no movement), we want to retain all the bits so we mask with
1220
// `0xff`; if we shift right by 1, we want to retain all bits except the MSB so
1221
// we mask with `0x7f`; etc.
1222
1223
#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1224
const I8X16_ISHL_MASKS: [u8; 128] = [
1225
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1226
0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
1227
0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
1228
0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
1229
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
1230
0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
1231
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
1232
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
1233
];
1234
1235
#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1236
const I8X16_USHR_MASKS: [u8; 128] = [
1237
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1238
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
1239
0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
1240
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
1241
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1242
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
1243
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
1244
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
1245
];
1246
1247