Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
35269 views
1
//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the interfaces that X86 uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15
#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16
17
#include "llvm/CodeGen/MachineFunction.h"
18
#include "llvm/CodeGen/TargetLowering.h"
19
20
namespace llvm {
21
class X86Subtarget;
22
class X86TargetMachine;
23
24
namespace X86ISD {
25
// X86 Specific DAG Nodes
26
enum NodeType : unsigned {
27
// Start the numbering where the builtin ops leave off.
28
FIRST_NUMBER = ISD::BUILTIN_OP_END,
29
30
/// Bit scan forward.
31
BSF,
32
/// Bit scan reverse.
33
BSR,
34
35
/// X86 funnel/double shift i16 instructions. These correspond to
36
/// X86::SHLDW and X86::SHRDW instructions which have different amt
37
/// modulo rules to generic funnel shifts.
38
/// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39
FSHL,
40
FSHR,
41
42
/// Bitwise logical AND of floating point values. This corresponds
43
/// to X86::ANDPS or X86::ANDPD.
44
FAND,
45
46
/// Bitwise logical OR of floating point values. This corresponds
47
/// to X86::ORPS or X86::ORPD.
48
FOR,
49
50
/// Bitwise logical XOR of floating point values. This corresponds
51
/// to X86::XORPS or X86::XORPD.
52
FXOR,
53
54
/// Bitwise logical ANDNOT of floating point values. This
55
/// corresponds to X86::ANDNPS or X86::ANDNPD.
56
FANDN,
57
58
/// These operations represent an abstract X86 call
59
/// instruction, which includes a bunch of information. In particular the
60
/// operands of these node are:
61
///
62
/// #0 - The incoming token chain
63
/// #1 - The callee
64
/// #2 - The number of arg bytes the caller pushes on the stack.
65
/// #3 - The number of arg bytes the callee pops off the stack.
66
/// #4 - The value to pass in AL/AX/EAX (optional)
67
/// #5 - The value to pass in DL/DX/EDX (optional)
68
///
69
/// The result values of these nodes are:
70
///
71
/// #0 - The outgoing token chain
72
/// #1 - The first register result value (optional)
73
/// #2 - The second register result value (optional)
74
///
75
CALL,
76
77
/// Same as call except it adds the NoTrack prefix.
78
NT_CALL,
79
80
// Pseudo for a OBJC call that gets emitted together with a special
81
// marker instruction.
82
CALL_RVMARKER,
83
84
/// X86 compare and logical compare instructions.
85
CMP,
86
FCMP,
87
COMI,
88
UCOMI,
89
90
/// X86 bit-test instructions.
91
BT,
92
93
/// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94
/// operand, usually produced by a CMP instruction.
95
SETCC,
96
97
/// X86 Select
98
SELECTS,
99
100
// Same as SETCC except it's materialized with a sbb and the value is all
101
// one's or all zero's.
102
SETCC_CARRY, // R = carry_bit ? ~0 : 0
103
104
/// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105
/// Operands are two FP values to compare; result is a mask of
106
/// 0s or 1s. Generally DTRT for C/C++ with NaNs.
107
FSETCC,
108
109
/// X86 FP SETCC, similar to above, but with output as an i1 mask and
110
/// and a version with SAE.
111
FSETCCM,
112
FSETCCM_SAE,
113
114
/// X86 conditional moves. Operand 0 and operand 1 are the two values
115
/// to select from. Operand 2 is the condition code, and operand 3 is the
116
/// flag operand produced by a CMP or TEST instruction.
117
CMOV,
118
119
/// X86 conditional branches. Operand 0 is the chain operand, operand 1
120
/// is the block to branch if condition is true, operand 2 is the
121
/// condition code, and operand 3 is the flag operand produced by a CMP
122
/// or TEST instruction.
123
BRCOND,
124
125
/// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126
/// operand 1 is the target address.
127
NT_BRIND,
128
129
/// Return with a glue operand. Operand 0 is the chain operand, operand
130
/// 1 is the number of bytes of stack to pop.
131
RET_GLUE,
132
133
/// Return from interrupt. Operand 0 is the number of bytes to pop.
134
IRET,
135
136
/// Repeat fill, corresponds to X86::REP_STOSx.
137
REP_STOS,
138
139
/// Repeat move, corresponds to X86::REP_MOVSx.
140
REP_MOVS,
141
142
/// On Darwin, this node represents the result of the popl
143
/// at function entry, used for PIC code.
144
GlobalBaseReg,
145
146
/// A wrapper node for TargetConstantPool, TargetJumpTable,
147
/// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148
/// MCSymbol and TargetBlockAddress.
149
Wrapper,
150
151
/// Special wrapper used under X86-64 PIC mode for RIP
152
/// relative displacements.
153
WrapperRIP,
154
155
/// Copies a 64-bit value from an MMX vector to the low word
156
/// of an XMM vector, with the high word zero filled.
157
MOVQ2DQ,
158
159
/// Copies a 64-bit value from the low word of an XMM vector
160
/// to an MMX vector.
161
MOVDQ2Q,
162
163
/// Copies a 32-bit value from the low word of a MMX
164
/// vector to a GPR.
165
MMX_MOVD2W,
166
167
/// Copies a GPR into the low 32-bit word of a MMX vector
168
/// and zero out the high word.
169
MMX_MOVW2D,
170
171
/// Extract an 8-bit value from a vector and zero extend it to
172
/// i32, corresponds to X86::PEXTRB.
173
PEXTRB,
174
175
/// Extract a 16-bit value from a vector and zero extend it to
176
/// i32, corresponds to X86::PEXTRW.
177
PEXTRW,
178
179
/// Insert any element of a 4 x float vector into any element
180
/// of a destination 4 x floatvector.
181
INSERTPS,
182
183
/// Insert the lower 8-bits of a 32-bit value to a vector,
184
/// corresponds to X86::PINSRB.
185
PINSRB,
186
187
/// Insert the lower 16-bits of a 32-bit value to a vector,
188
/// corresponds to X86::PINSRW.
189
PINSRW,
190
191
/// Shuffle 16 8-bit values within a vector.
192
PSHUFB,
193
194
/// Compute Sum of Absolute Differences.
195
PSADBW,
196
/// Compute Double Block Packed Sum-Absolute-Differences
197
DBPSADBW,
198
199
/// Bitwise Logical AND NOT of Packed FP values.
200
ANDNP,
201
202
/// Blend where the selector is an immediate.
203
BLENDI,
204
205
/// Dynamic (non-constant condition) vector blend where only the sign bits
206
/// of the condition elements are used. This is used to enforce that the
207
/// condition mask is not valid for generic VSELECT optimizations. This
208
/// is also used to implement the intrinsics.
209
/// Operands are in VSELECT order: MASK, TRUE, FALSE
210
BLENDV,
211
212
/// Combined add and sub on an FP vector.
213
ADDSUB,
214
215
// FP vector ops with rounding mode.
216
FADD_RND,
217
FADDS,
218
FADDS_RND,
219
FSUB_RND,
220
FSUBS,
221
FSUBS_RND,
222
FMUL_RND,
223
FMULS,
224
FMULS_RND,
225
FDIV_RND,
226
FDIVS,
227
FDIVS_RND,
228
FMAX_SAE,
229
FMAXS_SAE,
230
FMIN_SAE,
231
FMINS_SAE,
232
FSQRT_RND,
233
FSQRTS,
234
FSQRTS_RND,
235
236
// FP vector get exponent.
237
FGETEXP,
238
FGETEXP_SAE,
239
FGETEXPS,
240
FGETEXPS_SAE,
241
// Extract Normalized Mantissas.
242
VGETMANT,
243
VGETMANT_SAE,
244
VGETMANTS,
245
VGETMANTS_SAE,
246
// FP Scale.
247
SCALEF,
248
SCALEF_RND,
249
SCALEFS,
250
SCALEFS_RND,
251
252
/// Integer horizontal add/sub.
253
HADD,
254
HSUB,
255
256
/// Floating point horizontal add/sub.
257
FHADD,
258
FHSUB,
259
260
// Detect Conflicts Within a Vector
261
CONFLICT,
262
263
/// Floating point max and min.
264
FMAX,
265
FMIN,
266
267
/// Commutative FMIN and FMAX.
268
FMAXC,
269
FMINC,
270
271
/// Scalar intrinsic floating point max and min.
272
FMAXS,
273
FMINS,
274
275
/// Floating point reciprocal-sqrt and reciprocal approximation.
276
/// Note that these typically require refinement
277
/// in order to obtain suitable precision.
278
FRSQRT,
279
FRCP,
280
281
// AVX-512 reciprocal approximations with a little more precision.
282
RSQRT14,
283
RSQRT14S,
284
RCP14,
285
RCP14S,
286
287
// Thread Local Storage.
288
TLSADDR,
289
290
// Thread Local Storage. A call to get the start address
291
// of the TLS block for the current module.
292
TLSBASEADDR,
293
294
// Thread Local Storage. When calling to an OS provided
295
// thunk at the address from an earlier relocation.
296
TLSCALL,
297
298
// Thread Local Storage. A descriptor containing pointer to
299
// code and to argument to get the TLS offset for the symbol.
300
TLSDESC,
301
302
// Exception Handling helpers.
303
EH_RETURN,
304
305
// SjLj exception handling setjmp.
306
EH_SJLJ_SETJMP,
307
308
// SjLj exception handling longjmp.
309
EH_SJLJ_LONGJMP,
310
311
// SjLj exception handling dispatch.
312
EH_SJLJ_SETUP_DISPATCH,
313
314
/// Tail call return. See X86TargetLowering::LowerCall for
315
/// the list of operands.
316
TC_RETURN,
317
318
// Vector move to low scalar and zero higher vector elements.
319
VZEXT_MOVL,
320
321
// Vector integer truncate.
322
VTRUNC,
323
// Vector integer truncate with unsigned/signed saturation.
324
VTRUNCUS,
325
VTRUNCS,
326
327
// Masked version of the above. Used when less than a 128-bit result is
328
// produced since the mask only applies to the lower elements and can't
329
// be represented by a select.
330
// SRC, PASSTHRU, MASK
331
VMTRUNC,
332
VMTRUNCUS,
333
VMTRUNCS,
334
335
// Vector FP extend.
336
VFPEXT,
337
VFPEXT_SAE,
338
VFPEXTS,
339
VFPEXTS_SAE,
340
341
// Vector FP round.
342
VFPROUND,
343
VFPROUND_RND,
344
VFPROUNDS,
345
VFPROUNDS_RND,
346
347
// Masked version of above. Used for v2f64->v4f32.
348
// SRC, PASSTHRU, MASK
349
VMFPROUND,
350
351
// 128-bit vector logical left / right shift
352
VSHLDQ,
353
VSRLDQ,
354
355
// Vector shift elements
356
VSHL,
357
VSRL,
358
VSRA,
359
360
// Vector variable shift
361
VSHLV,
362
VSRLV,
363
VSRAV,
364
365
// Vector shift elements by immediate
366
VSHLI,
367
VSRLI,
368
VSRAI,
369
370
// Shifts of mask registers.
371
KSHIFTL,
372
KSHIFTR,
373
374
// Bit rotate by immediate
375
VROTLI,
376
VROTRI,
377
378
// Vector packed double/float comparison.
379
CMPP,
380
381
// Vector integer comparisons.
382
PCMPEQ,
383
PCMPGT,
384
385
// v8i16 Horizontal minimum and position.
386
PHMINPOS,
387
388
MULTISHIFT,
389
390
/// Vector comparison generating mask bits for fp and
391
/// integer signed and unsigned data types.
392
CMPM,
393
// Vector mask comparison generating mask bits for FP values.
394
CMPMM,
395
// Vector mask comparison with SAE for FP values.
396
CMPMM_SAE,
397
398
// Arithmetic operations with FLAGS results.
399
ADD,
400
SUB,
401
ADC,
402
SBB,
403
SMUL,
404
UMUL,
405
OR,
406
XOR,
407
AND,
408
409
// Bit field extract.
410
BEXTR,
411
BEXTRI,
412
413
// Zero High Bits Starting with Specified Bit Position.
414
BZHI,
415
416
// Parallel extract and deposit.
417
PDEP,
418
PEXT,
419
420
// X86-specific multiply by immediate.
421
MUL_IMM,
422
423
// Vector sign bit extraction.
424
MOVMSK,
425
426
// Vector bitwise comparisons.
427
PTEST,
428
429
// Vector packed fp sign bitwise comparisons.
430
TESTP,
431
432
// OR/AND test for masks.
433
KORTEST,
434
KTEST,
435
436
// ADD for masks.
437
KADD,
438
439
// Several flavors of instructions with vector shuffle behaviors.
440
// Saturated signed/unnsigned packing.
441
PACKSS,
442
PACKUS,
443
// Intra-lane alignr.
444
PALIGNR,
445
// AVX512 inter-lane alignr.
446
VALIGN,
447
PSHUFD,
448
PSHUFHW,
449
PSHUFLW,
450
SHUFP,
451
// VBMI2 Concat & Shift.
452
VSHLD,
453
VSHRD,
454
VSHLDV,
455
VSHRDV,
456
// Shuffle Packed Values at 128-bit granularity.
457
SHUF128,
458
MOVDDUP,
459
MOVSHDUP,
460
MOVSLDUP,
461
MOVLHPS,
462
MOVHLPS,
463
MOVSD,
464
MOVSS,
465
MOVSH,
466
UNPCKL,
467
UNPCKH,
468
VPERMILPV,
469
VPERMILPI,
470
VPERMI,
471
VPERM2X128,
472
473
// Variable Permute (VPERM).
474
// Res = VPERMV MaskV, V0
475
VPERMV,
476
477
// 3-op Variable Permute (VPERMT2).
478
// Res = VPERMV3 V0, MaskV, V1
479
VPERMV3,
480
481
// Bitwise ternary logic.
482
VPTERNLOG,
483
// Fix Up Special Packed Float32/64 values.
484
VFIXUPIMM,
485
VFIXUPIMM_SAE,
486
VFIXUPIMMS,
487
VFIXUPIMMS_SAE,
488
// Range Restriction Calculation For Packed Pairs of Float32/64 values.
489
VRANGE,
490
VRANGE_SAE,
491
VRANGES,
492
VRANGES_SAE,
493
// Reduce - Perform Reduction Transformation on scalar\packed FP.
494
VREDUCE,
495
VREDUCE_SAE,
496
VREDUCES,
497
VREDUCES_SAE,
498
// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
499
// Also used by the legacy (V)ROUND intrinsics where we mask out the
500
// scaling part of the immediate.
501
VRNDSCALE,
502
VRNDSCALE_SAE,
503
VRNDSCALES,
504
VRNDSCALES_SAE,
505
// Tests Types Of a FP Values for packed types.
506
VFPCLASS,
507
// Tests Types Of a FP Values for scalar types.
508
VFPCLASSS,
509
510
// Broadcast (splat) scalar or element 0 of a vector. If the operand is
511
// a vector, this node may change the vector length as part of the splat.
512
VBROADCAST,
513
// Broadcast mask to vector.
514
VBROADCASTM,
515
516
/// SSE4A Extraction and Insertion.
517
EXTRQI,
518
INSERTQI,
519
520
// XOP arithmetic/logical shifts.
521
VPSHA,
522
VPSHL,
523
// XOP signed/unsigned integer comparisons.
524
VPCOM,
525
VPCOMU,
526
// XOP packed permute bytes.
527
VPPERM,
528
// XOP two source permutation.
529
VPERMIL2,
530
531
// Vector multiply packed unsigned doubleword integers.
532
PMULUDQ,
533
// Vector multiply packed signed doubleword integers.
534
PMULDQ,
535
// Vector Multiply Packed UnsignedIntegers with Round and Scale.
536
MULHRS,
537
538
// Multiply and Add Packed Integers.
539
VPMADDUBSW,
540
VPMADDWD,
541
542
// AVX512IFMA multiply and add.
543
// NOTE: These are different than the instruction and perform
544
// op0 x op1 + op2.
545
VPMADD52L,
546
VPMADD52H,
547
548
// VNNI
549
VPDPBUSD,
550
VPDPBUSDS,
551
VPDPWSSD,
552
VPDPWSSDS,
553
554
// FMA nodes.
555
// We use the target independent ISD::FMA for the non-inverted case.
556
FNMADD,
557
FMSUB,
558
FNMSUB,
559
FMADDSUB,
560
FMSUBADD,
561
562
// FMA with rounding mode.
563
FMADD_RND,
564
FNMADD_RND,
565
FMSUB_RND,
566
FNMSUB_RND,
567
FMADDSUB_RND,
568
FMSUBADD_RND,
569
570
// AVX512-FP16 complex addition and multiplication.
571
VFMADDC,
572
VFMADDC_RND,
573
VFCMADDC,
574
VFCMADDC_RND,
575
576
VFMULC,
577
VFMULC_RND,
578
VFCMULC,
579
VFCMULC_RND,
580
581
VFMADDCSH,
582
VFMADDCSH_RND,
583
VFCMADDCSH,
584
VFCMADDCSH_RND,
585
586
VFMULCSH,
587
VFMULCSH_RND,
588
VFCMULCSH,
589
VFCMULCSH_RND,
590
591
VPDPBSUD,
592
VPDPBSUDS,
593
VPDPBUUD,
594
VPDPBUUDS,
595
VPDPBSSD,
596
VPDPBSSDS,
597
598
// Compress and expand.
599
COMPRESS,
600
EXPAND,
601
602
// Bits shuffle
603
VPSHUFBITQMB,
604
605
// Convert Unsigned/Integer to Floating-Point Value with rounding mode.
606
SINT_TO_FP_RND,
607
UINT_TO_FP_RND,
608
SCALAR_SINT_TO_FP,
609
SCALAR_UINT_TO_FP,
610
SCALAR_SINT_TO_FP_RND,
611
SCALAR_UINT_TO_FP_RND,
612
613
// Vector float/double to signed/unsigned integer.
614
CVTP2SI,
615
CVTP2UI,
616
CVTP2SI_RND,
617
CVTP2UI_RND,
618
// Scalar float/double to signed/unsigned integer.
619
CVTS2SI,
620
CVTS2UI,
621
CVTS2SI_RND,
622
CVTS2UI_RND,
623
624
// Vector float/double to signed/unsigned integer with truncation.
625
CVTTP2SI,
626
CVTTP2UI,
627
CVTTP2SI_SAE,
628
CVTTP2UI_SAE,
629
// Scalar float/double to signed/unsigned integer with truncation.
630
CVTTS2SI,
631
CVTTS2UI,
632
CVTTS2SI_SAE,
633
CVTTS2UI_SAE,
634
635
// Vector signed/unsigned integer to float/double.
636
CVTSI2P,
637
CVTUI2P,
638
639
// Masked versions of above. Used for v2f64->v4f32.
640
// SRC, PASSTHRU, MASK
641
MCVTP2SI,
642
MCVTP2UI,
643
MCVTTP2SI,
644
MCVTTP2UI,
645
MCVTSI2P,
646
MCVTUI2P,
647
648
// Vector float to bfloat16.
649
// Convert TWO packed single data to one packed BF16 data
650
CVTNE2PS2BF16,
651
// Convert packed single data to packed BF16 data
652
CVTNEPS2BF16,
653
// Masked version of above.
654
// SRC, PASSTHRU, MASK
655
MCVTNEPS2BF16,
656
657
// Dot product of BF16 pairs to accumulated into
658
// packed single precision.
659
DPBF16PS,
660
661
// A stack checking function call. On Windows it's _chkstk call.
662
DYN_ALLOCA,
663
664
// For allocating variable amounts of stack space when using
665
// segmented stacks. Check if the current stacklet has enough space, and
666
// falls back to heap allocation if not.
667
SEG_ALLOCA,
668
669
// For allocating stack space when using stack clash protector.
670
// Allocation is performed by block, and each block is probed.
671
PROBED_ALLOCA,
672
673
// Memory barriers.
674
MFENCE,
675
676
// Get a random integer and indicate whether it is valid in CF.
677
RDRAND,
678
679
// Get a NIST SP800-90B & C compliant random integer and
680
// indicate whether it is valid in CF.
681
RDSEED,
682
683
// Protection keys
684
// RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
685
// WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
686
// value for ECX.
687
RDPKRU,
688
WRPKRU,
689
690
// SSE42 string comparisons.
691
// These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
692
// will emit one or two instructions based on which results are used. If
693
// flags and index/mask this allows us to use a single instruction since
694
// we won't have to pick and opcode for flags. Instead we can rely on the
695
// DAG to CSE everything and decide at isel.
696
PCMPISTR,
697
PCMPESTR,
698
699
// Test if in transactional execution.
700
XTEST,
701
702
// Conversions between float and half-float.
703
CVTPS2PH,
704
CVTPS2PH_SAE,
705
CVTPH2PS,
706
CVTPH2PS_SAE,
707
708
// Masked version of above.
709
// SRC, RND, PASSTHRU, MASK
710
MCVTPS2PH,
711
MCVTPS2PH_SAE,
712
713
// Galois Field Arithmetic Instructions
714
GF2P8AFFINEINVQB,
715
GF2P8AFFINEQB,
716
GF2P8MULB,
717
718
// LWP insert record.
719
LWPINS,
720
721
// User level wait
722
UMWAIT,
723
TPAUSE,
724
725
// Enqueue Stores Instructions
726
ENQCMD,
727
ENQCMDS,
728
729
// For avx512-vp2intersect
730
VP2INTERSECT,
731
732
// User level interrupts - testui
733
TESTUI,
734
735
// Perform an FP80 add after changing precision control in FPCW.
736
FP80_ADD,
737
738
// Conditional compare instructions
739
CCMP,
740
CTEST,
741
742
/// X86 strict FP compare instructions.
743
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
744
STRICT_FCMPS,
745
746
// Vector packed double/float comparison.
747
STRICT_CMPP,
748
749
/// Vector comparison generating mask bits for fp and
750
/// integer signed and unsigned data types.
751
STRICT_CMPM,
752
753
// Vector float/double to signed/unsigned integer with truncation.
754
STRICT_CVTTP2SI,
755
STRICT_CVTTP2UI,
756
757
// Vector FP extend.
758
STRICT_VFPEXT,
759
760
// Vector FP round.
761
STRICT_VFPROUND,
762
763
// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
764
// Also used by the legacy (V)ROUND intrinsics where we mask out the
765
// scaling part of the immediate.
766
STRICT_VRNDSCALE,
767
768
// Vector signed/unsigned integer to float/double.
769
STRICT_CVTSI2P,
770
STRICT_CVTUI2P,
771
772
// Strict FMA nodes.
773
STRICT_FNMADD,
774
STRICT_FMSUB,
775
STRICT_FNMSUB,
776
777
// Conversions between float and half-float.
778
STRICT_CVTPS2PH,
779
STRICT_CVTPH2PS,
780
781
// Perform an FP80 add after changing precision control in FPCW.
782
STRICT_FP80_ADD,
783
784
// WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
785
// non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
786
787
// Compare and swap.
788
LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
789
LCMPXCHG8_DAG,
790
LCMPXCHG16_DAG,
791
LCMPXCHG16_SAVE_RBX_DAG,
792
793
/// LOCK-prefixed arithmetic read-modify-write instructions.
794
/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
795
LADD,
796
LSUB,
797
LOR,
798
LXOR,
799
LAND,
800
LBTS,
801
LBTC,
802
LBTR,
803
LBTS_RM,
804
LBTC_RM,
805
LBTR_RM,
806
807
/// RAO arithmetic instructions.
808
/// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
809
AADD,
810
AOR,
811
AXOR,
812
AAND,
813
814
// Load, scalar_to_vector, and zero extend.
815
VZEXT_LOAD,
816
817
// extract_vector_elt, store.
818
VEXTRACT_STORE,
819
820
// scalar broadcast from memory.
821
VBROADCAST_LOAD,
822
823
// subvector broadcast from memory.
824
SUBV_BROADCAST_LOAD,
825
826
// Store FP control word into i16 memory.
827
FNSTCW16m,
828
829
// Load FP control word from i16 memory.
830
FLDCW16m,
831
832
// Store x87 FPU environment into memory.
833
FNSTENVm,
834
835
// Load x87 FPU environment from memory.
836
FLDENVm,
837
838
/// This instruction implements FP_TO_SINT with the
839
/// integer destination in memory and a FP reg source. This corresponds
840
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
841
/// has two inputs (token chain and address) and two outputs (int value
842
/// and token chain). Memory VT specifies the type to store to.
843
FP_TO_INT_IN_MEM,
844
845
/// This instruction implements SINT_TO_FP with the
846
/// integer source in memory and FP reg result. This corresponds to the
847
/// X86::FILD*m instructions. It has two inputs (token chain and address)
848
/// and two outputs (FP value and token chain). The integer source type is
849
/// specified by the memory VT.
850
FILD,
851
852
/// This instruction implements a fp->int store from FP stack
853
/// slots. This corresponds to the fist instruction. It takes a
854
/// chain operand, value to store, address, and glue. The memory VT
855
/// specifies the type to store as.
856
FIST,
857
858
/// This instruction implements an extending load to FP stack slots.
859
/// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
860
/// operand, and ptr to load from. The memory VT specifies the type to
861
/// load from.
862
FLD,
863
864
/// This instruction implements a truncating store from FP stack
865
/// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
866
/// chain operand, value to store, address, and glue. The memory VT
867
/// specifies the type to store as.
868
FST,
869
870
/// These instructions grab the address of the next argument
871
/// from a va_list. (reads and modifies the va_list in memory)
872
VAARG_64,
873
VAARG_X32,
874
875
// Vector truncating store with unsigned/signed saturation
876
VTRUNCSTOREUS,
877
VTRUNCSTORES,
878
// Vector truncating masked store with unsigned/signed saturation
879
VMTRUNCSTOREUS,
880
VMTRUNCSTORES,
881
882
// X86 specific gather and scatter
883
MGATHER,
884
MSCATTER,
885
886
// Key locker nodes that produce flags.
887
AESENC128KL,
888
AESDEC128KL,
889
AESENC256KL,
890
AESDEC256KL,
891
AESENCWIDE128KL,
892
AESDECWIDE128KL,
893
AESENCWIDE256KL,
894
AESDECWIDE256KL,
895
896
/// Compare and Add if Condition is Met. Compare value in operand 2 with
897
/// value in memory of operand 1. If condition of operand 4 is met, add
898
/// value operand 3 to m32 and write new value in operand 1. Operand 2 is
899
/// always updated with the original value from operand 1.
900
CMPCCXADD,
901
902
// Save xmm argument registers to the stack, according to %al. An operator
903
// is needed so that this can be expanded with control flow.
904
VASTART_SAVE_XMM_REGS,
905
906
// Conditional load/store instructions
907
CLOAD,
908
CSTORE,
909
910
// WARNING: Do not add anything in the end unless you want the node to
911
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
912
// opcodes will be thought as target memory ops!
913
};
914
} // end namespace X86ISD
915
916
namespace X86 {
917
/// Current rounding mode is represented in bits 11:10 of FPSR. These
918
/// values are same as corresponding constants for rounding mode used
919
/// in glibc.
920
enum RoundingMode {
921
rmToNearest = 0, // FE_TONEAREST
922
rmDownward = 1 << 10, // FE_DOWNWARD
923
rmUpward = 2 << 10, // FE_UPWARD
924
rmTowardZero = 3 << 10, // FE_TOWARDZERO
925
rmMask = 3 << 10 // Bit mask selecting rounding mode
926
};
927
}
928
929
/// Define some predicates that are used for node matching.
930
namespace X86 {
931
/// Returns true if Elt is a constant zero or floating point constant +0.0.
932
bool isZeroNode(SDValue Elt);
933
934
/// Returns true of the given offset can be
935
/// fit into displacement field of the instruction.
936
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
937
bool hasSymbolicDisplacement);
938
939
/// Determines whether the callee is required to pop its
940
/// own arguments. Callee pop is necessary to support tail calls.
941
bool isCalleePop(CallingConv::ID CallingConv,
942
bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
943
944
/// If Op is a constant whose elements are all the same constant or
945
/// undefined, return true and return the constant value in \p SplatVal.
946
/// If we have undef bits that don't cover an entire element, we treat these
947
/// as zero if AllowPartialUndefs is set, else we fail and return false.
948
bool isConstantSplat(SDValue Op, APInt &SplatVal,
949
bool AllowPartialUndefs = true);
950
951
/// Check if Op is a load operation that could be folded into some other x86
952
/// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
953
bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
954
bool AssumeSingleUse = false);
955
956
/// Check if Op is a load operation that could be folded into a vector splat
957
/// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
958
bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
959
const X86Subtarget &Subtarget,
960
bool AssumeSingleUse = false);
961
962
/// Check if Op is a value that could be used to fold a store into some
963
/// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
964
bool mayFoldIntoStore(SDValue Op);
965
966
/// Check if Op is an operation that could be folded into a zero extend x86
967
/// instruction.
968
bool mayFoldIntoZeroExtend(SDValue Op);
969
970
/// True if the target supports the extended frame for async Swift
971
/// functions.
972
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
973
const MachineFunction &MF);
974
} // end namespace X86
975
976
//===--------------------------------------------------------------------===//
977
// X86 Implementation of the TargetLowering interface
978
class X86TargetLowering final : public TargetLowering {
979
public:
980
explicit X86TargetLowering(const X86TargetMachine &TM,
981
const X86Subtarget &STI);
982
983
unsigned getJumpTableEncoding() const override;
984
bool useSoftFloat() const override;
985
986
void markLibCallAttributes(MachineFunction *MF, unsigned CC,
987
ArgListTy &Args) const override;
988
989
MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
990
return MVT::i8;
991
}
992
993
const MCExpr *
994
LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
995
const MachineBasicBlock *MBB, unsigned uid,
996
MCContext &Ctx) const override;
997
998
/// Returns relocation base for the given PIC jumptable.
999
SDValue getPICJumpTableRelocBase(SDValue Table,
1000
SelectionDAG &DAG) const override;
1001
const MCExpr *
1002
getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1003
unsigned JTI, MCContext &Ctx) const override;
1004
1005
/// Return the desired alignment for ByVal aggregate
1006
/// function arguments in the caller parameter area. For X86, aggregates
1007
/// that contains are placed at 16-byte boundaries while the rest are at
1008
/// 4-byte boundaries.
1009
uint64_t getByValTypeAlignment(Type *Ty,
1010
const DataLayout &DL) const override;
1011
1012
EVT getOptimalMemOpType(const MemOp &Op,
1013
const AttributeList &FuncAttributes) const override;
1014
1015
/// Returns true if it's safe to use load / store of the
1016
/// specified type to expand memcpy / memset inline. This is mostly true
1017
/// for all types except for some special cases. For example, on X86
1018
/// targets without SSE2 f64 load / store are done with fldl / fstpl which
1019
/// also does type conversion. Note the specified type doesn't have to be
1020
/// legal as the hook is used before type legalization.
1021
bool isSafeMemOpType(MVT VT) const override;
1022
1023
bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1024
1025
/// Returns true if the target allows unaligned memory accesses of the
1026
/// specified type. Returns whether it is "fast" in the last argument.
1027
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1028
MachineMemOperand::Flags Flags,
1029
unsigned *Fast) const override;
1030
1031
/// This function returns true if the memory access is aligned or if the
1032
/// target allows this specific unaligned memory access. If the access is
1033
/// allowed, the optional final parameter returns a relative speed of the
1034
/// access (as defined by the target).
1035
bool allowsMemoryAccess(
1036
LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1037
Align Alignment,
1038
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1039
unsigned *Fast = nullptr) const override;
1040
1041
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1042
const MachineMemOperand &MMO,
1043
unsigned *Fast) const {
1044
return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
1045
MMO.getAlign(), MMO.getFlags(), Fast);
1046
}
1047
1048
/// Provide custom lowering hooks for some operations.
1049
///
1050
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1051
1052
/// Replace the results of node with an illegal result
1053
/// type with new values built out of custom code.
1054
///
1055
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1056
SelectionDAG &DAG) const override;
1057
1058
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1059
1060
bool preferABDSToABSWithNSW(EVT VT) const override;
1061
1062
bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1063
EVT ExtVT) const override;
1064
1065
bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1066
EVT VT) const override;
1067
1068
/// Return true if the target has native support for
1069
/// the specified value type and it is 'desirable' to use the type for the
1070
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1071
/// instruction encodings are longer and some i16 instructions are slow.
1072
bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1073
1074
/// Return true if the target has native support for the
1075
/// specified value type and it is 'desirable' to use the type. e.g. On x86
1076
/// i16 is legal, but undesirable since i16 instruction encodings are longer
1077
/// and some i16 instructions are slow.
1078
bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1079
1080
/// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1081
/// integer, None otherwise.
1082
TargetLowering::AndOrSETCCFoldKind
1083
isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1084
const SDNode *SETCC0,
1085
const SDNode *SETCC1) const override;
1086
1087
/// Return the newly negated expression if the cost is not expensive and
1088
/// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1089
/// do the negation.
1090
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1091
bool LegalOperations, bool ForCodeSize,
1092
NegatibleCost &Cost,
1093
unsigned Depth) const override;
1094
1095
MachineBasicBlock *
1096
EmitInstrWithCustomInserter(MachineInstr &MI,
1097
MachineBasicBlock *MBB) const override;
1098
1099
/// This method returns the name of a target specific DAG node.
1100
const char *getTargetNodeName(unsigned Opcode) const override;
1101
1102
/// Do not merge vector stores after legalization because that may conflict
1103
/// with x86-specific store splitting optimizations.
1104
bool mergeStoresAfterLegalization(EVT MemVT) const override {
1105
return !MemVT.isVector();
1106
}
1107
1108
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1109
const MachineFunction &MF) const override;
1110
1111
bool isCheapToSpeculateCttz(Type *Ty) const override;
1112
1113
bool isCheapToSpeculateCtlz(Type *Ty) const override;
1114
1115
bool isCtlzFast() const override;
1116
1117
bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1118
// If the pair to store is a mixture of float and int values, we will
1119
// save two bitwise instructions and one float-to-int instruction and
1120
// increase one store instruction. There is potentially a more
1121
// significant benefit because it avoids the float->int domain switch
1122
// for input value. So It is more likely a win.
1123
if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1124
(LTy.isInteger() && HTy.isFloatingPoint()))
1125
return true;
1126
// If the pair only contains int values, we will save two bitwise
1127
// instructions and increase one store instruction (costing one more
1128
// store buffer). Since the benefit is more blurred so we leave
1129
// such pair out until we get testcase to prove it is a win.
1130
return false;
1131
}
1132
1133
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1134
1135
bool hasAndNotCompare(SDValue Y) const override;
1136
1137
bool hasAndNot(SDValue Y) const override;
1138
1139
bool hasBitTest(SDValue X, SDValue Y) const override;
1140
1141
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1142
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1143
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1144
SelectionDAG &DAG) const override;
1145
1146
unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1147
EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1148
const APInt &ShiftOrRotateAmt,
1149
const std::optional<APInt> &AndMask) const override;
1150
1151
bool preferScalarizeSplat(SDNode *N) const override;
1152
1153
CondMergingParams
1154
getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1155
const Value *Rhs) const override;
1156
1157
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1158
CombineLevel Level) const override;
1159
1160
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1161
1162
bool
1163
shouldTransformSignedTruncationCheck(EVT XVT,
1164
unsigned KeptBits) const override {
1165
// For vectors, we don't have a preference..
1166
if (XVT.isVector())
1167
return false;
1168
1169
auto VTIsOk = [](EVT VT) -> bool {
1170
return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1171
VT == MVT::i64;
1172
};
1173
1174
// We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1175
// XVT will be larger than KeptBitsVT.
1176
MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1177
return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1178
}
1179
1180
ShiftLegalizationStrategy
1181
preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1182
unsigned ExpansionFactor) const override;
1183
1184
bool shouldSplatInsEltVarIndex(EVT VT) const override;
1185
1186
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1187
// Converting to sat variants holds little benefit on X86 as we will just
1188
// need to saturate the value back using fp arithmatic.
1189
return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1190
}
1191
1192
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1193
return VT.isScalarInteger();
1194
}
1195
1196
/// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1197
MVT hasFastEqualityCompare(unsigned NumBits) const override;
1198
1199
/// Return the value type to use for ISD::SETCC.
1200
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1201
EVT VT) const override;
1202
1203
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1204
const APInt &DemandedElts,
1205
TargetLoweringOpt &TLO) const override;
1206
1207
/// Determine which of the bits specified in Mask are known to be either
1208
/// zero or one and return them in the KnownZero/KnownOne bitsets.
1209
void computeKnownBitsForTargetNode(const SDValue Op,
1210
KnownBits &Known,
1211
const APInt &DemandedElts,
1212
const SelectionDAG &DAG,
1213
unsigned Depth = 0) const override;
1214
1215
/// Determine the number of bits in the operation that are sign bits.
1216
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1217
const APInt &DemandedElts,
1218
const SelectionDAG &DAG,
1219
unsigned Depth) const override;
1220
1221
bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1222
const APInt &DemandedElts,
1223
APInt &KnownUndef,
1224
APInt &KnownZero,
1225
TargetLoweringOpt &TLO,
1226
unsigned Depth) const override;
1227
1228
bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1229
const APInt &DemandedElts,
1230
unsigned MaskIndex,
1231
TargetLoweringOpt &TLO,
1232
unsigned Depth) const;
1233
1234
bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1235
const APInt &DemandedBits,
1236
const APInt &DemandedElts,
1237
KnownBits &Known,
1238
TargetLoweringOpt &TLO,
1239
unsigned Depth) const override;
1240
1241
SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1242
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1243
SelectionDAG &DAG, unsigned Depth) const override;
1244
1245
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1246
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1247
bool PoisonOnly, unsigned Depth) const override;
1248
1249
bool canCreateUndefOrPoisonForTargetNode(
1250
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1251
bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1252
1253
bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1254
APInt &UndefElts, const SelectionDAG &DAG,
1255
unsigned Depth) const override;
1256
1257
bool isTargetCanonicalConstantNode(SDValue Op) const override {
1258
// Peek through bitcasts/extracts/inserts to see if we have a broadcast
1259
// vector from memory.
1260
while (Op.getOpcode() == ISD::BITCAST ||
1261
Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1262
(Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1263
Op.getOperand(0).isUndef()))
1264
Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1265
1266
return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1267
TargetLowering::isTargetCanonicalConstantNode(Op);
1268
}
1269
1270
const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1271
1272
SDValue unwrapAddress(SDValue N) const override;
1273
1274
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1275
1276
bool ExpandInlineAsm(CallInst *CI) const override;
1277
1278
ConstraintType getConstraintType(StringRef Constraint) const override;
1279
1280
/// Examine constraint string and operand type and determine a weight value.
1281
/// The operand object must already have been set up with the operand type.
1282
ConstraintWeight
1283
getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1284
const char *Constraint) const override;
1285
1286
const char *LowerXConstraint(EVT ConstraintVT) const override;
1287
1288
/// Lower the specified operand into the Ops vector. If it is invalid, don't
1289
/// add anything to Ops. If hasMemory is true it means one of the asm
1290
/// constraint of the inline asm instruction being processed is 'm'.
1291
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1292
std::vector<SDValue> &Ops,
1293
SelectionDAG &DAG) const override;
1294
1295
InlineAsm::ConstraintCode
1296
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1297
if (ConstraintCode == "v")
1298
return InlineAsm::ConstraintCode::v;
1299
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1300
}
1301
1302
/// Handle Lowering flag assembly outputs.
1303
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1304
const SDLoc &DL,
1305
const AsmOperandInfo &Constraint,
1306
SelectionDAG &DAG) const override;
1307
1308
/// Given a physical register constraint
1309
/// (e.g. {edx}), return the register number and the register class for the
1310
/// register. This should only be used for C_Register constraints. On
1311
/// error, this returns a register number of 0.
1312
std::pair<unsigned, const TargetRegisterClass *>
1313
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1314
StringRef Constraint, MVT VT) const override;
1315
1316
/// Return true if the addressing mode represented
1317
/// by AM is legal for this target, for a load/store of the specified type.
1318
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1319
Type *Ty, unsigned AS,
1320
Instruction *I = nullptr) const override;
1321
1322
bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
1323
1324
/// Return true if the specified immediate is legal
1325
/// icmp immediate, that is the target has icmp instructions which can
1326
/// compare a register against the immediate without having to materialize
1327
/// the immediate into a register.
1328
bool isLegalICmpImmediate(int64_t Imm) const override;
1329
1330
/// Return true if the specified immediate is legal
1331
/// add immediate, that is the target has add instructions which can
1332
/// add a register and the immediate without having to materialize
1333
/// the immediate into a register.
1334
bool isLegalAddImmediate(int64_t Imm) const override;
1335
1336
bool isLegalStoreImmediate(int64_t Imm) const override;
1337
1338
/// This is used to enable splatted operand transforms for vector shifts
1339
/// and vector funnel shifts.
1340
bool isVectorShiftByScalarCheap(Type *Ty) const override;
1341
1342
/// Add x86-specific opcodes to the default list.
1343
bool isBinOp(unsigned Opcode) const override;
1344
1345
/// Returns true if the opcode is a commutative binary operation.
1346
bool isCommutativeBinOp(unsigned Opcode) const override;
1347
1348
/// Return true if it's free to truncate a value of
1349
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1350
/// register EAX to i16 by referencing its sub-register AX.
1351
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1352
bool isTruncateFree(EVT VT1, EVT VT2) const override;
1353
1354
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1355
1356
/// Return true if any actual instruction that defines a
1357
/// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1358
/// register. This does not necessarily include registers defined in
1359
/// unknown ways, such as incoming arguments, or copies from unknown
1360
/// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1361
/// does not necessarily apply to truncate instructions. e.g. on x86-64,
1362
/// all instructions that define 32-bit values implicit zero-extend the
1363
/// result out to 64 bits.
1364
bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1365
bool isZExtFree(EVT VT1, EVT VT2) const override;
1366
bool isZExtFree(SDValue Val, EVT VT2) const override;
1367
1368
bool shouldSinkOperands(Instruction *I,
1369
SmallVectorImpl<Use *> &Ops) const override;
1370
bool shouldConvertPhiType(Type *From, Type *To) const override;
1371
1372
/// Return true if folding a vector load into ExtVal (a sign, zero, or any
1373
/// extend node) is profitable.
1374
bool isVectorLoadExtDesirable(SDValue) const override;
1375
1376
/// Return true if an FMA operation is faster than a pair of fmul and fadd
1377
/// instructions. fmuladd intrinsics will be expanded to FMAs when this
1378
/// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1379
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1380
EVT VT) const override;
1381
1382
/// Return true if it's profitable to narrow operations of type SrcVT to
1383
/// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1384
/// from i32 to i16.
1385
bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;
1386
1387
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1388
EVT VT) const override;
1389
1390
/// Given an intrinsic, checks if on the target the intrinsic will need to map
1391
/// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1392
/// true and stores the intrinsic information into the IntrinsicInfo that was
1393
/// passed to the function.
1394
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1395
MachineFunction &MF,
1396
unsigned Intrinsic) const override;
1397
1398
/// Returns true if the target can instruction select the
1399
/// specified FP immediate natively. If false, the legalizer will
1400
/// materialize the FP immediate as a load from a constant pool.
1401
bool isFPImmLegal(const APFloat &Imm, EVT VT,
1402
bool ForCodeSize) const override;
1403
1404
/// Targets can use this to indicate that they only support *some*
1405
/// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1406
/// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1407
/// be legal.
1408
bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1409
1410
/// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1411
/// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1412
/// constant pool entry.
1413
bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1414
1415
/// Returns true if lowering to a jump table is allowed.
1416
bool areJTsAllowed(const Function *Fn) const override;
1417
1418
MVT getPreferredSwitchConditionType(LLVMContext &Context,
1419
EVT ConditionVT) const override;
1420
1421
/// If true, then instruction selection should
1422
/// seek to shrink the FP constant of the specified type to a smaller type
1423
/// in order to save space and / or reduce runtime.
1424
bool ShouldShrinkFPConstant(EVT VT) const override;
1425
1426
/// Return true if we believe it is correct and profitable to reduce the
1427
/// load node to a smaller type.
1428
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1429
EVT NewVT) const override;
1430
1431
/// Return true if the specified scalar FP type is computed in an SSE
1432
/// register, not on the X87 floating point stack.
1433
bool isScalarFPTypeInSSEReg(EVT VT) const;
1434
1435
/// Returns true if it is beneficial to convert a load of a constant
1436
/// to just the constant itself.
1437
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1438
Type *Ty) const override;
1439
1440
bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1441
1442
bool convertSelectOfConstantsToMath(EVT VT) const override;
1443
1444
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1445
SDValue C) const override;
1446
1447
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1448
/// with this index.
1449
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1450
unsigned Index) const override;
1451
1452
/// Scalar ops always have equal or better analysis/performance/power than
1453
/// the vector equivalent, so this always makes sense if the scalar op is
1454
/// supported.
1455
bool shouldScalarizeBinop(SDValue) const override;
1456
1457
/// Extract of a scalar FP value from index 0 of a vector is free.
1458
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1459
EVT EltVT = VT.getScalarType();
1460
return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1461
}
1462
1463
/// Overflow nodes should get combined/lowered to optimal instructions
1464
/// (they should allow eliminating explicit compares by getting flags from
1465
/// math ops).
1466
bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1467
bool MathUsed) const override;
1468
1469
bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1470
unsigned AddrSpace) const override {
1471
// If we can replace more than 2 scalar stores, there will be a reduction
1472
// in instructions even after we add a vector constant load.
1473
return IsZero || NumElem > 2;
1474
}
1475
1476
bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1477
const SelectionDAG &DAG,
1478
const MachineMemOperand &MMO) const override;
1479
1480
Register getRegisterByName(const char* RegName, LLT VT,
1481
const MachineFunction &MF) const override;
1482
1483
/// If a physical register, this returns the register that receives the
1484
/// exception address on entry to an EH pad.
1485
Register
1486
getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1487
1488
/// If a physical register, this returns the register that receives the
1489
/// exception typeid on entry to a landing pad.
1490
Register
1491
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1492
1493
bool needsFixedCatchObjects() const override;
1494
1495
/// This method returns a target specific FastISel object,
1496
/// or null if the target does not support "fast" ISel.
1497
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1498
const TargetLibraryInfo *libInfo) const override;
1499
1500
/// If the target has a standard location for the stack protector cookie,
1501
/// returns the address of that location. Otherwise, returns nullptr.
1502
Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1503
1504
bool useLoadStackGuardNode() const override;
1505
bool useStackGuardXorFP() const override;
1506
void insertSSPDeclarations(Module &M) const override;
1507
Value *getSDagStackGuard(const Module &M) const override;
1508
Function *getSSPStackGuardCheck(const Module &M) const override;
1509
SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1510
const SDLoc &DL) const override;
1511
1512
1513
/// Return true if the target stores SafeStack pointer at a fixed offset in
1514
/// some non-standard address space, and populates the address space and
1515
/// offset as appropriate.
1516
Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1517
1518
std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1519
SDValue Chain, SDValue Pointer,
1520
MachinePointerInfo PtrInfo,
1521
Align Alignment,
1522
SelectionDAG &DAG) const;
1523
1524
/// Customize the preferred legalization strategy for certain types.
1525
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1526
1527
bool softPromoteHalfType() const override { return true; }
1528
1529
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1530
EVT VT) const override;
1531
1532
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1533
CallingConv::ID CC,
1534
EVT VT) const override;
1535
1536
unsigned getVectorTypeBreakdownForCallingConv(
1537
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1538
unsigned &NumIntermediates, MVT &RegisterVT) const override;
1539
1540
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1541
1542
bool supportSwiftError() const override;
1543
1544
bool supportKCFIBundles() const override { return true; }
1545
1546
MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1547
MachineBasicBlock::instr_iterator &MBBI,
1548
const TargetInstrInfo *TII) const override;
1549
1550
bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1551
bool hasInlineStackProbe(const MachineFunction &MF) const override;
1552
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1553
1554
unsigned getStackProbeSize(const MachineFunction &MF) const;
1555
1556
bool hasVectorBlend() const override { return true; }
1557
1558
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1559
1560
bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1561
unsigned OpNo) const override;
1562
1563
SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1564
MachineMemOperand *MMO, SDValue &NewLoad,
1565
SDValue Ptr, SDValue PassThru,
1566
SDValue Mask) const override;
1567
SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1568
MachineMemOperand *MMO, SDValue Ptr, SDValue Val,
1569
SDValue Mask) const override;
1570
1571
/// Lower interleaved load(s) into target specific
1572
/// instructions/intrinsics.
1573
bool lowerInterleavedLoad(LoadInst *LI,
1574
ArrayRef<ShuffleVectorInst *> Shuffles,
1575
ArrayRef<unsigned> Indices,
1576
unsigned Factor) const override;
1577
1578
/// Lower interleaved store(s) into target specific
1579
/// instructions/intrinsics.
1580
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1581
unsigned Factor) const override;
1582
1583
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1584
int JTI, SelectionDAG &DAG) const override;
1585
1586
Align getPrefLoopAlignment(MachineLoop *ML) const override;
1587
1588
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1589
if (VT == MVT::f80)
1590
return EVT::getIntegerVT(Context, 96);
1591
return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1592
}
1593
1594
protected:
1595
std::pair<const TargetRegisterClass *, uint8_t>
1596
findRepresentativeClass(const TargetRegisterInfo *TRI,
1597
MVT VT) const override;
1598
1599
private:
1600
/// Keep a reference to the X86Subtarget around so that we can
1601
/// make the right decision when generating code for different targets.
1602
const X86Subtarget &Subtarget;
1603
1604
/// A list of legal FP immediates.
1605
std::vector<APFloat> LegalFPImmediates;
1606
1607
/// Indicate that this x86 target can instruction
1608
/// select the specified FP immediate natively.
1609
void addLegalFPImmediate(const APFloat& Imm) {
1610
LegalFPImmediates.push_back(Imm);
1611
}
1612
1613
SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1614
CallingConv::ID CallConv, bool isVarArg,
1615
const SmallVectorImpl<ISD::InputArg> &Ins,
1616
const SDLoc &dl, SelectionDAG &DAG,
1617
SmallVectorImpl<SDValue> &InVals,
1618
uint32_t *RegMask) const;
1619
SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1620
const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1621
const SDLoc &dl, SelectionDAG &DAG,
1622
const CCValAssign &VA, MachineFrameInfo &MFI,
1623
unsigned i) const;
1624
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1625
const SDLoc &dl, SelectionDAG &DAG,
1626
const CCValAssign &VA,
1627
ISD::ArgFlagsTy Flags, bool isByval) const;
1628
1629
// Call lowering helpers.
1630
1631
/// Check whether the call is eligible for tail call optimization. Targets
1632
/// that want to do tail call optimization should implement this function.
1633
bool IsEligibleForTailCallOptimization(
1634
TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
1635
SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const;
1636
SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1637
SDValue Chain, bool IsTailCall,
1638
bool Is64Bit, int FPDiff,
1639
const SDLoc &dl) const;
1640
1641
unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1642
SelectionDAG &DAG) const;
1643
1644
unsigned getAddressSpace() const;
1645
1646
SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1647
SDValue &Chain) const;
1648
SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1649
1650
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1651
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1652
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1653
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1654
1655
unsigned getGlobalWrapperKind(const GlobalValue *GV,
1656
const unsigned char OpFlags) const;
1657
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1658
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1659
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1660
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1661
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1662
1663
/// Creates target global address or external symbol nodes for calls or
1664
/// other uses.
1665
SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1666
bool ForCall) const;
1667
1668
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1669
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1670
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1671
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1672
SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1673
SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1674
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1675
SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1676
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1677
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1678
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1679
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1680
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1681
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1682
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1683
SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1684
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1685
SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1686
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1687
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1688
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1689
SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1690
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1691
SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1692
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1693
SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1694
SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1695
SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1696
SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1697
SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1698
SDValue &Chain) const;
1699
SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1700
SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1701
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1702
SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1703
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1704
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1705
SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1706
1707
SDValue
1708
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1709
const SmallVectorImpl<ISD::InputArg> &Ins,
1710
const SDLoc &dl, SelectionDAG &DAG,
1711
SmallVectorImpl<SDValue> &InVals) const override;
1712
SDValue LowerCall(CallLoweringInfo &CLI,
1713
SmallVectorImpl<SDValue> &InVals) const override;
1714
1715
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1716
const SmallVectorImpl<ISD::OutputArg> &Outs,
1717
const SmallVectorImpl<SDValue> &OutVals,
1718
const SDLoc &dl, SelectionDAG &DAG) const override;
1719
1720
bool supportSplitCSR(MachineFunction *MF) const override {
1721
return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1722
MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1723
}
1724
void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1725
void insertCopiesSplitCSR(
1726
MachineBasicBlock *Entry,
1727
const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1728
1729
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1730
1731
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1732
1733
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1734
ISD::NodeType ExtendKind) const override;
1735
1736
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1737
bool isVarArg,
1738
const SmallVectorImpl<ISD::OutputArg> &Outs,
1739
LLVMContext &Context) const override;
1740
1741
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1742
ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1743
1744
TargetLoweringBase::AtomicExpansionKind
1745
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1746
TargetLoweringBase::AtomicExpansionKind
1747
shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1748
TargetLoweringBase::AtomicExpansionKind
1749
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1750
TargetLoweringBase::AtomicExpansionKind
1751
shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1752
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1753
void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1754
1755
LoadInst *
1756
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1757
1758
bool needsCmpXchgNb(Type *MemType) const;
1759
1760
void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1761
MachineBasicBlock *DispatchBB, int FI) const;
1762
1763
// Utility function to emit the low-level va_arg code for X86-64.
1764
MachineBasicBlock *
1765
EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1766
1767
/// Utility function to emit the xmm reg save portion of va_start.
1768
MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1769
MachineInstr &MI2,
1770
MachineBasicBlock *BB) const;
1771
1772
MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1773
MachineBasicBlock *BB) const;
1774
1775
MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1776
MachineBasicBlock *BB) const;
1777
1778
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1779
MachineBasicBlock *BB) const;
1780
1781
MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1782
MachineBasicBlock *BB) const;
1783
1784
MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1785
MachineBasicBlock *BB) const;
1786
1787
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1788
MachineBasicBlock *BB) const;
1789
1790
MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1791
MachineBasicBlock *BB) const;
1792
1793
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1794
MachineBasicBlock *MBB) const;
1795
1796
void emitSetJmpShadowStackFix(MachineInstr &MI,
1797
MachineBasicBlock *MBB) const;
1798
1799
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1800
MachineBasicBlock *MBB) const;
1801
1802
MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1803
MachineBasicBlock *MBB) const;
1804
1805
MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1806
MachineBasicBlock *MBB) const;
1807
1808
MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI,
1809
MachineBasicBlock *MBB) const;
1810
1811
/// Emit flags for the given setcc condition and operands. Also returns the
1812
/// corresponding X86 condition code constant in X86CC.
1813
SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1814
const SDLoc &dl, SelectionDAG &DAG,
1815
SDValue &X86CC) const;
1816
1817
bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1818
SDValue IntPow2) const override;
1819
1820
/// Check if replacement of SQRT with RSQRT should be disabled.
1821
bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1822
1823
/// Use rsqrt* to speed up sqrt calculations.
1824
SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1825
int &RefinementSteps, bool &UseOneConstNR,
1826
bool Reciprocal) const override;
1827
1828
/// Use rcp* to speed up fdiv calculations.
1829
SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1830
int &RefinementSteps) const override;
1831
1832
/// Reassociate floating point divisions into multiply by reciprocal.
1833
unsigned combineRepeatedFPDivisors() const override;
1834
1835
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1836
SmallVectorImpl<SDNode *> &Created) const override;
1837
1838
SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1839
SDValue V2) const;
1840
};
1841
1842
namespace X86 {
1843
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1844
const TargetLibraryInfo *libInfo);
1845
} // end namespace X86
1846
1847
// X86 specific Gather/Scatter nodes.
1848
// The class has the same order of operands as MaskedGatherScatterSDNode for
1849
// convenience.
1850
class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1851
public:
1852
// This is a intended as a utility and should never be directly created.
1853
X86MaskedGatherScatterSDNode() = delete;
1854
~X86MaskedGatherScatterSDNode() = delete;
1855
1856
const SDValue &getBasePtr() const { return getOperand(3); }
1857
const SDValue &getIndex() const { return getOperand(4); }
1858
const SDValue &getMask() const { return getOperand(2); }
1859
const SDValue &getScale() const { return getOperand(5); }
1860
1861
static bool classof(const SDNode *N) {
1862
return N->getOpcode() == X86ISD::MGATHER ||
1863
N->getOpcode() == X86ISD::MSCATTER;
1864
}
1865
};
1866
1867
class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1868
public:
1869
const SDValue &getPassThru() const { return getOperand(1); }
1870
1871
static bool classof(const SDNode *N) {
1872
return N->getOpcode() == X86ISD::MGATHER;
1873
}
1874
};
1875
1876
class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1877
public:
1878
const SDValue &getValue() const { return getOperand(1); }
1879
1880
static bool classof(const SDNode *N) {
1881
return N->getOpcode() == X86ISD::MSCATTER;
1882
}
1883
};
1884
1885
/// Generate unpacklo/unpackhi shuffle mask.
1886
void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1887
bool Unary);
1888
1889
/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1890
/// imposed by AVX and specific to the unary pattern. Example:
1891
/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1892
/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1893
void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1894
1895
} // end namespace llvm
1896
1897
#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1898
1899