CoCalc -- X86ISelLowering.h

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
³⁵²⁶⁹ views
1
//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the interfaces that X86 uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13

14
#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15
#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16

17
#include "llvm/CodeGen/MachineFunction.h"
18
#include "llvm/CodeGen/TargetLowering.h"
19

20
namespace llvm {
21
  class X86Subtarget;
22
  class X86TargetMachine;
23

24
  namespace X86ISD {
25
    // X86 Specific DAG Nodes
26
  enum NodeType : unsigned {
27
    // Start the numbering where the builtin ops leave off.
28
    FIRST_NUMBER = ISD::BUILTIN_OP_END,
29

30
    /// Bit scan forward.
31
    BSF,
32
    /// Bit scan reverse.
33
    BSR,
34

35
    /// X86 funnel/double shift i16 instructions. These correspond to
36
    /// X86::SHLDW and X86::SHRDW instructions which have different amt
37
    /// modulo rules to generic funnel shifts.
38
    /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39
    FSHL,
40
    FSHR,
41

42
    /// Bitwise logical AND of floating point values. This corresponds
43
    /// to X86::ANDPS or X86::ANDPD.
44
    FAND,
45

46
    /// Bitwise logical OR of floating point values. This corresponds
47
    /// to X86::ORPS or X86::ORPD.
48
    FOR,
49

50
    /// Bitwise logical XOR of floating point values. This corresponds
51
    /// to X86::XORPS or X86::XORPD.
52
    FXOR,
53

54
    ///  Bitwise logical ANDNOT of floating point values. This
55
    /// corresponds to X86::ANDNPS or X86::ANDNPD.
56
    FANDN,
57

58
    /// These operations represent an abstract X86 call
59
    /// instruction, which includes a bunch of information.  In particular the
60
    /// operands of these node are:
61
    ///
62
    ///     #0 - The incoming token chain
63
    ///     #1 - The callee
64
    ///     #2 - The number of arg bytes the caller pushes on the stack.
65
    ///     #3 - The number of arg bytes the callee pops off the stack.
66
    ///     #4 - The value to pass in AL/AX/EAX (optional)
67
    ///     #5 - The value to pass in DL/DX/EDX (optional)
68
    ///
69
    /// The result values of these nodes are:
70
    ///
71
    ///     #0 - The outgoing token chain
72
    ///     #1 - The first register result value (optional)
73
    ///     #2 - The second register result value (optional)
74
    ///
75
    CALL,
76

77
    /// Same as call except it adds the NoTrack prefix.
78
    NT_CALL,
79

80
    // Pseudo for a OBJC call that gets emitted together with a special
81
    // marker instruction.
82
    CALL_RVMARKER,
83

84
    /// X86 compare and logical compare instructions.
85
    CMP,
86
    FCMP,
87
    COMI,
88
    UCOMI,
89

90
    /// X86 bit-test instructions.
91
    BT,
92

93
    /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94
    /// operand, usually produced by a CMP instruction.
95
    SETCC,
96

97
    /// X86 Select
98
    SELECTS,
99

100
    // Same as SETCC except it's materialized with a sbb and the value is all
101
    // one's or all zero's.
102
    SETCC_CARRY, // R = carry_bit ? ~0 : 0
103

104
    /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105
    /// Operands are two FP values to compare; result is a mask of
106
    /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
107
    FSETCC,
108

109
    /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110
    /// and a version with SAE.
111
    FSETCCM,
112
    FSETCCM_SAE,
113

114
    /// X86 conditional moves. Operand 0 and operand 1 are the two values
115
    /// to select from. Operand 2 is the condition code, and operand 3 is the
116
    /// flag operand produced by a CMP or TEST instruction.
117
    CMOV,
118

119
    /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120
    /// is the block to branch if condition is true, operand 2 is the
121
    /// condition code, and operand 3 is the flag operand produced by a CMP
122
    /// or TEST instruction.
123
    BRCOND,
124

125
    /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126
    /// operand 1 is the target address.
127
    NT_BRIND,
128

129
    /// Return with a glue operand. Operand 0 is the chain operand, operand
130
    /// 1 is the number of bytes of stack to pop.
131
    RET_GLUE,
132

133
    /// Return from interrupt. Operand 0 is the number of bytes to pop.
134
    IRET,
135

136
    /// Repeat fill, corresponds to X86::REP_STOSx.
137
    REP_STOS,
138

139
    /// Repeat move, corresponds to X86::REP_MOVSx.
140
    REP_MOVS,
141

142
    /// On Darwin, this node represents the result of the popl
143
    /// at function entry, used for PIC code.
144
    GlobalBaseReg,
145

146
    /// A wrapper node for TargetConstantPool, TargetJumpTable,
147
    /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148
    /// MCSymbol and TargetBlockAddress.
149
    Wrapper,
150

151
    /// Special wrapper used under X86-64 PIC mode for RIP
152
    /// relative displacements.
153
    WrapperRIP,
154

155
    /// Copies a 64-bit value from an MMX vector to the low word
156
    /// of an XMM vector, with the high word zero filled.
157
    MOVQ2DQ,
158

159
    /// Copies a 64-bit value from the low word of an XMM vector
160
    /// to an MMX vector.
161
    MOVDQ2Q,
162

163
    /// Copies a 32-bit value from the low word of a MMX
164
    /// vector to a GPR.
165
    MMX_MOVD2W,
166

167
    /// Copies a GPR into the low 32-bit word of a MMX vector
168
    /// and zero out the high word.
169
    MMX_MOVW2D,
170

171
    /// Extract an 8-bit value from a vector and zero extend it to
172
    /// i32, corresponds to X86::PEXTRB.
173
    PEXTRB,
174

175
    /// Extract a 16-bit value from a vector and zero extend it to
176
    /// i32, corresponds to X86::PEXTRW.
177
    PEXTRW,
178

179
    /// Insert any element of a 4 x float vector into any element
180
    /// of a destination 4 x floatvector.
181
    INSERTPS,
182

183
    /// Insert the lower 8-bits of a 32-bit value to a vector,
184
    /// corresponds to X86::PINSRB.
185
    PINSRB,
186

187
    /// Insert the lower 16-bits of a 32-bit value to a vector,
188
    /// corresponds to X86::PINSRW.
189
    PINSRW,
190

191
    /// Shuffle 16 8-bit values within a vector.
192
    PSHUFB,
193

194
    /// Compute Sum of Absolute Differences.
195
    PSADBW,
196
    /// Compute Double Block Packed Sum-Absolute-Differences
197
    DBPSADBW,
198

199
    /// Bitwise Logical AND NOT of Packed FP values.
200
    ANDNP,
201

202
    /// Blend where the selector is an immediate.
203
    BLENDI,
204

205
    /// Dynamic (non-constant condition) vector blend where only the sign bits
206
    /// of the condition elements are used. This is used to enforce that the
207
    /// condition mask is not valid for generic VSELECT optimizations. This
208
    /// is also used to implement the intrinsics.
209
    /// Operands are in VSELECT order: MASK, TRUE, FALSE
210
    BLENDV,
211

212
    /// Combined add and sub on an FP vector.
213
    ADDSUB,
214

215
    //  FP vector ops with rounding mode.
216
    FADD_RND,
217
    FADDS,
218
    FADDS_RND,
219
    FSUB_RND,
220
    FSUBS,
221
    FSUBS_RND,
222
    FMUL_RND,
223
    FMULS,
224
    FMULS_RND,
225
    FDIV_RND,
226
    FDIVS,
227
    FDIVS_RND,
228
    FMAX_SAE,
229
    FMAXS_SAE,
230
    FMIN_SAE,
231
    FMINS_SAE,
232
    FSQRT_RND,
233
    FSQRTS,
234
    FSQRTS_RND,
235

236
    // FP vector get exponent.
237
    FGETEXP,
238
    FGETEXP_SAE,
239
    FGETEXPS,
240
    FGETEXPS_SAE,
241
    // Extract Normalized Mantissas.
242
    VGETMANT,
243
    VGETMANT_SAE,
244
    VGETMANTS,
245
    VGETMANTS_SAE,
246
    // FP Scale.
247
    SCALEF,
248
    SCALEF_RND,
249
    SCALEFS,
250
    SCALEFS_RND,
251

252
    /// Integer horizontal add/sub.
253
    HADD,
254
    HSUB,
255

256
    /// Floating point horizontal add/sub.
257
    FHADD,
258
    FHSUB,
259

260
    // Detect Conflicts Within a Vector
261
    CONFLICT,
262

263
    /// Floating point max and min.
264
    FMAX,
265
    FMIN,
266

267
    /// Commutative FMIN and FMAX.
268
    FMAXC,
269
    FMINC,
270

271
    /// Scalar intrinsic floating point max and min.
272
    FMAXS,
273
    FMINS,
274

275
    /// Floating point reciprocal-sqrt and reciprocal approximation.
276
    /// Note that these typically require refinement
277
    /// in order to obtain suitable precision.
278
    FRSQRT,
279
    FRCP,
280

281
    // AVX-512 reciprocal approximations with a little more precision.
282
    RSQRT14,
283
    RSQRT14S,
284
    RCP14,
285
    RCP14S,
286

287
    // Thread Local Storage.
288
    TLSADDR,
289

290
    // Thread Local Storage. A call to get the start address
291
    // of the TLS block for the current module.
292
    TLSBASEADDR,
293

294
    // Thread Local Storage.  When calling to an OS provided
295
    // thunk at the address from an earlier relocation.
296
    TLSCALL,
297

298
    // Thread Local Storage. A descriptor containing pointer to
299
    // code and to argument to get the TLS offset for the symbol.
300
    TLSDESC,
301

302
    // Exception Handling helpers.
303
    EH_RETURN,
304

305
    // SjLj exception handling setjmp.
306
    EH_SJLJ_SETJMP,
307

308
    // SjLj exception handling longjmp.
309
    EH_SJLJ_LONGJMP,
310

311
    // SjLj exception handling dispatch.
312
    EH_SJLJ_SETUP_DISPATCH,
313

314
    /// Tail call return. See X86TargetLowering::LowerCall for
315
    /// the list of operands.
316
    TC_RETURN,
317

318
    // Vector move to low scalar and zero higher vector elements.
319
    VZEXT_MOVL,
320

321
    // Vector integer truncate.
322
    VTRUNC,
323
    // Vector integer truncate with unsigned/signed saturation.
324
    VTRUNCUS,
325
    VTRUNCS,
326

327
    // Masked version of the above. Used when less than a 128-bit result is
328
    // produced since the mask only applies to the lower elements and can't
329
    // be represented by a select.
330
    // SRC, PASSTHRU, MASK
331
    VMTRUNC,
332
    VMTRUNCUS,
333
    VMTRUNCS,
334

335
    // Vector FP extend.
336
    VFPEXT,
337
    VFPEXT_SAE,
338
    VFPEXTS,
339
    VFPEXTS_SAE,
340

341
    // Vector FP round.
342
    VFPROUND,
343
    VFPROUND_RND,
344
    VFPROUNDS,
345
    VFPROUNDS_RND,
346

347
    // Masked version of above. Used for v2f64->v4f32.
348
    // SRC, PASSTHRU, MASK
349
    VMFPROUND,
350

351
    // 128-bit vector logical left / right shift
352
    VSHLDQ,
353
    VSRLDQ,
354

355
    // Vector shift elements
356
    VSHL,
357
    VSRL,
358
    VSRA,
359

360
    // Vector variable shift
361
    VSHLV,
362
    VSRLV,
363
    VSRAV,
364

365
    // Vector shift elements by immediate
366
    VSHLI,
367
    VSRLI,
368
    VSRAI,
369

370
    // Shifts of mask registers.
371
    KSHIFTL,
372
    KSHIFTR,
373

374
    // Bit rotate by immediate
375
    VROTLI,
376
    VROTRI,
377

378
    // Vector packed double/float comparison.
379
    CMPP,
380

381
    // Vector integer comparisons.
382
    PCMPEQ,
383
    PCMPGT,
384

385
    // v8i16 Horizontal minimum and position.
386
    PHMINPOS,
387

388
    MULTISHIFT,
389

390
    /// Vector comparison generating mask bits for fp and
391
    /// integer signed and unsigned data types.
392
    CMPM,
393
    // Vector mask comparison generating mask bits for FP values.
394
    CMPMM,
395
    // Vector mask comparison with SAE for FP values.
396
    CMPMM_SAE,
397

398
    // Arithmetic operations with FLAGS results.
399
    ADD,
400
    SUB,
401
    ADC,
402
    SBB,
403
    SMUL,
404
    UMUL,
405
    OR,
406
    XOR,
407
    AND,
408

409
    // Bit field extract.
410
    BEXTR,
411
    BEXTRI,
412

413
    // Zero High Bits Starting with Specified Bit Position.
414
    BZHI,
415

416
    // Parallel extract and deposit.
417
    PDEP,
418
    PEXT,
419

420
    // X86-specific multiply by immediate.
421
    MUL_IMM,
422

423
    // Vector sign bit extraction.
424
    MOVMSK,
425

426
    // Vector bitwise comparisons.
427
    PTEST,
428

429
    // Vector packed fp sign bitwise comparisons.
430
    TESTP,
431

432
    // OR/AND test for masks.
433
    KORTEST,
434
    KTEST,
435

436
    // ADD for masks.
437
    KADD,
438

439
    // Several flavors of instructions with vector shuffle behaviors.
440
    // Saturated signed/unnsigned packing.
441
    PACKSS,
442
    PACKUS,
443
    // Intra-lane alignr.
444
    PALIGNR,
445
    // AVX512 inter-lane alignr.
446
    VALIGN,
447
    PSHUFD,
448
    PSHUFHW,
449
    PSHUFLW,
450
    SHUFP,
451
    // VBMI2 Concat & Shift.
452
    VSHLD,
453
    VSHRD,
454
    VSHLDV,
455
    VSHRDV,
456
    // Shuffle Packed Values at 128-bit granularity.
457
    SHUF128,
458
    MOVDDUP,
459
    MOVSHDUP,
460
    MOVSLDUP,
461
    MOVLHPS,
462
    MOVHLPS,
463
    MOVSD,
464
    MOVSS,
465
    MOVSH,
466
    UNPCKL,
467
    UNPCKH,
468
    VPERMILPV,
469
    VPERMILPI,
470
    VPERMI,
471
    VPERM2X128,
472

473
    // Variable Permute (VPERM).
474
    // Res = VPERMV MaskV, V0
475
    VPERMV,
476

477
    // 3-op Variable Permute (VPERMT2).
478
    // Res = VPERMV3 V0, MaskV, V1
479
    VPERMV3,
480

481
    // Bitwise ternary logic.
482
    VPTERNLOG,
483
    // Fix Up Special Packed Float32/64 values.
484
    VFIXUPIMM,
485
    VFIXUPIMM_SAE,
486
    VFIXUPIMMS,
487
    VFIXUPIMMS_SAE,
488
    // Range Restriction Calculation For Packed Pairs of Float32/64 values.
489
    VRANGE,
490
    VRANGE_SAE,
491
    VRANGES,
492
    VRANGES_SAE,
493
    // Reduce - Perform Reduction Transformation on scalar\packed FP.
494
    VREDUCE,
495
    VREDUCE_SAE,
496
    VREDUCES,
497
    VREDUCES_SAE,
498
    // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
499
    // Also used by the legacy (V)ROUND intrinsics where we mask out the
500
    // scaling part of the immediate.
501
    VRNDSCALE,
502
    VRNDSCALE_SAE,
503
    VRNDSCALES,
504
    VRNDSCALES_SAE,
505
    // Tests Types Of a FP Values for packed types.
506
    VFPCLASS,
507
    // Tests Types Of a FP Values for scalar types.
508
    VFPCLASSS,
509

510
    // Broadcast (splat) scalar or element 0 of a vector. If the operand is
511
    // a vector, this node may change the vector length as part of the splat.
512
    VBROADCAST,
513
    // Broadcast mask to vector.
514
    VBROADCASTM,
515

516
    /// SSE4A Extraction and Insertion.
517
    EXTRQI,
518
    INSERTQI,
519

520
    // XOP arithmetic/logical shifts.
521
    VPSHA,
522
    VPSHL,
523
    // XOP signed/unsigned integer comparisons.
524
    VPCOM,
525
    VPCOMU,
526
    // XOP packed permute bytes.
527
    VPPERM,
528
    // XOP two source permutation.
529
    VPERMIL2,
530

531
    // Vector multiply packed unsigned doubleword integers.
532
    PMULUDQ,
533
    // Vector multiply packed signed doubleword integers.
534
    PMULDQ,
535
    // Vector Multiply Packed UnsignedIntegers with Round and Scale.
536
    MULHRS,
537

538
    // Multiply and Add Packed Integers.
539
    VPMADDUBSW,
540
    VPMADDWD,
541

542
    // AVX512IFMA multiply and add.
543
    // NOTE: These are different than the instruction and perform
544
    // op0 x op1 + op2.
545
    VPMADD52L,
546
    VPMADD52H,
547

548
    // VNNI
549
    VPDPBUSD,
550
    VPDPBUSDS,
551
    VPDPWSSD,
552
    VPDPWSSDS,
553

554
    // FMA nodes.
555
    // We use the target independent ISD::FMA for the non-inverted case.
556
    FNMADD,
557
    FMSUB,
558
    FNMSUB,
559
    FMADDSUB,
560
    FMSUBADD,
561

562
    // FMA with rounding mode.
563
    FMADD_RND,
564
    FNMADD_RND,
565
    FMSUB_RND,
566
    FNMSUB_RND,
567
    FMADDSUB_RND,
568
    FMSUBADD_RND,
569

570
    // AVX512-FP16 complex addition and multiplication.
571
    VFMADDC,
572
    VFMADDC_RND,
573
    VFCMADDC,
574
    VFCMADDC_RND,
575

576
    VFMULC,
577
    VFMULC_RND,
578
    VFCMULC,
579
    VFCMULC_RND,
580

581
    VFMADDCSH,
582
    VFMADDCSH_RND,
583
    VFCMADDCSH,
584
    VFCMADDCSH_RND,
585

586
    VFMULCSH,
587
    VFMULCSH_RND,
588
    VFCMULCSH,
589
    VFCMULCSH_RND,
590

591
    VPDPBSUD,
592
    VPDPBSUDS,
593
    VPDPBUUD,
594
    VPDPBUUDS,
595
    VPDPBSSD,
596
    VPDPBSSDS,
597

598
    // Compress and expand.
599
    COMPRESS,
600
    EXPAND,
601

602
    // Bits shuffle
603
    VPSHUFBITQMB,
604

605
    // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
606
    SINT_TO_FP_RND,
607
    UINT_TO_FP_RND,
608
    SCALAR_SINT_TO_FP,
609
    SCALAR_UINT_TO_FP,
610
    SCALAR_SINT_TO_FP_RND,
611
    SCALAR_UINT_TO_FP_RND,
612

613
    // Vector float/double to signed/unsigned integer.
614
    CVTP2SI,
615
    CVTP2UI,
616
    CVTP2SI_RND,
617
    CVTP2UI_RND,
618
    // Scalar float/double to signed/unsigned integer.
619
    CVTS2SI,
620
    CVTS2UI,
621
    CVTS2SI_RND,
622
    CVTS2UI_RND,
623

624
    // Vector float/double to signed/unsigned integer with truncation.
625
    CVTTP2SI,
626
    CVTTP2UI,
627
    CVTTP2SI_SAE,
628
    CVTTP2UI_SAE,
629
    // Scalar float/double to signed/unsigned integer with truncation.
630
    CVTTS2SI,
631
    CVTTS2UI,
632
    CVTTS2SI_SAE,
633
    CVTTS2UI_SAE,
634

635
    // Vector signed/unsigned integer to float/double.
636
    CVTSI2P,
637
    CVTUI2P,
638

639
    // Masked versions of above. Used for v2f64->v4f32.
640
    // SRC, PASSTHRU, MASK
641
    MCVTP2SI,
642
    MCVTP2UI,
643
    MCVTTP2SI,
644
    MCVTTP2UI,
645
    MCVTSI2P,
646
    MCVTUI2P,
647

648
    // Vector float to bfloat16.
649
    // Convert TWO packed single data to one packed BF16 data
650
    CVTNE2PS2BF16,
651
    // Convert packed single data to packed BF16 data
652
    CVTNEPS2BF16,
653
    // Masked version of above.
654
    // SRC, PASSTHRU, MASK
655
    MCVTNEPS2BF16,
656

657
    // Dot product of BF16 pairs to accumulated into
658
    // packed single precision.
659
    DPBF16PS,
660

661
    // A stack checking function call. On Windows it's _chkstk call.
662
    DYN_ALLOCA,
663

664
    // For allocating variable amounts of stack space when using
665
    // segmented stacks. Check if the current stacklet has enough space, and
666
    // falls back to heap allocation if not.
667
    SEG_ALLOCA,
668

669
    // For allocating stack space when using stack clash protector.
670
    // Allocation is performed by block, and each block is probed.
671
    PROBED_ALLOCA,
672

673
    // Memory barriers.
674
    MFENCE,
675

676
    // Get a random integer and indicate whether it is valid in CF.
677
    RDRAND,
678

679
    // Get a NIST SP800-90B & C compliant random integer and
680
    // indicate whether it is valid in CF.
681
    RDSEED,
682

683
    // Protection keys
684
    // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
685
    // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
686
    // value for ECX.
687
    RDPKRU,
688
    WRPKRU,
689

690
    // SSE42 string comparisons.
691
    // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
692
    // will emit one or two instructions based on which results are used. If
693
    // flags and index/mask this allows us to use a single instruction since
694
    // we won't have to pick and opcode for flags. Instead we can rely on the
695
    // DAG to CSE everything and decide at isel.
696
    PCMPISTR,
697
    PCMPESTR,
698

699
    // Test if in transactional execution.
700
    XTEST,
701

702
    // Conversions between float and half-float.
703
    CVTPS2PH,
704
    CVTPS2PH_SAE,
705
    CVTPH2PS,
706
    CVTPH2PS_SAE,
707

708
    // Masked version of above.
709
    // SRC, RND, PASSTHRU, MASK
710
    MCVTPS2PH,
711
    MCVTPS2PH_SAE,
712

713
    // Galois Field Arithmetic Instructions
714
    GF2P8AFFINEINVQB,
715
    GF2P8AFFINEQB,
716
    GF2P8MULB,
717

718
    // LWP insert record.
719
    LWPINS,
720

721
    // User level wait
722
    UMWAIT,
723
    TPAUSE,
724

725
    // Enqueue Stores Instructions
726
    ENQCMD,
727
    ENQCMDS,
728

729
    // For avx512-vp2intersect
730
    VP2INTERSECT,
731

732
    // User level interrupts - testui
733
    TESTUI,
734

735
    // Perform an FP80 add after changing precision control in FPCW.
736
    FP80_ADD,
737

738
    // Conditional compare instructions
739
    CCMP,
740
    CTEST,
741

742
    /// X86 strict FP compare instructions.
743
    STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
744
    STRICT_FCMPS,
745

746
    // Vector packed double/float comparison.
747
    STRICT_CMPP,
748

749
    /// Vector comparison generating mask bits for fp and
750
    /// integer signed and unsigned data types.
751
    STRICT_CMPM,
752

753
    // Vector float/double to signed/unsigned integer with truncation.
754
    STRICT_CVTTP2SI,
755
    STRICT_CVTTP2UI,
756

757
    // Vector FP extend.
758
    STRICT_VFPEXT,
759

760
    // Vector FP round.
761
    STRICT_VFPROUND,
762

763
    // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
764
    // Also used by the legacy (V)ROUND intrinsics where we mask out the
765
    // scaling part of the immediate.
766
    STRICT_VRNDSCALE,
767

768
    // Vector signed/unsigned integer to float/double.
769
    STRICT_CVTSI2P,
770
    STRICT_CVTUI2P,
771

772
    // Strict FMA nodes.
773
    STRICT_FNMADD,
774
    STRICT_FMSUB,
775
    STRICT_FNMSUB,
776

777
    // Conversions between float and half-float.
778
    STRICT_CVTPS2PH,
779
    STRICT_CVTPH2PS,
780

781
    // Perform an FP80 add after changing precision control in FPCW.
782
    STRICT_FP80_ADD,
783

784
    // WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
785
    // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
786

787
    // Compare and swap.
788
    LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
789
    LCMPXCHG8_DAG,
790
    LCMPXCHG16_DAG,
791
    LCMPXCHG16_SAVE_RBX_DAG,
792

793
    /// LOCK-prefixed arithmetic read-modify-write instructions.
794
    /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
795
    LADD,
796
    LSUB,
797
    LOR,
798
    LXOR,
799
    LAND,
800
    LBTS,
801
    LBTC,
802
    LBTR,
803
    LBTS_RM,
804
    LBTC_RM,
805
    LBTR_RM,
806

807
    /// RAO arithmetic instructions.
808
    /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
809
    AADD,
810
    AOR,
811
    AXOR,
812
    AAND,
813

814
    // Load, scalar_to_vector, and zero extend.
815
    VZEXT_LOAD,
816

817
    // extract_vector_elt, store.
818
    VEXTRACT_STORE,
819

820
    // scalar broadcast from memory.
821
    VBROADCAST_LOAD,
822

823
    // subvector broadcast from memory.
824
    SUBV_BROADCAST_LOAD,
825

826
    // Store FP control word into i16 memory.
827
    FNSTCW16m,
828

829
    // Load FP control word from i16 memory.
830
    FLDCW16m,
831

832
    // Store x87 FPU environment into memory.
833
    FNSTENVm,
834

835
    // Load x87 FPU environment from memory.
836
    FLDENVm,
837

838
    /// This instruction implements FP_TO_SINT with the
839
    /// integer destination in memory and a FP reg source.  This corresponds
840
    /// to the X86::FIST*m instructions and the rounding mode change stuff. It
841
    /// has two inputs (token chain and address) and two outputs (int value
842
    /// and token chain). Memory VT specifies the type to store to.
843
    FP_TO_INT_IN_MEM,
844

845
    /// This instruction implements SINT_TO_FP with the
846
    /// integer source in memory and FP reg result.  This corresponds to the
847
    /// X86::FILD*m instructions. It has two inputs (token chain and address)
848
    /// and two outputs (FP value and token chain). The integer source type is
849
    /// specified by the memory VT.
850
    FILD,
851

852
    /// This instruction implements a fp->int store from FP stack
853
    /// slots. This corresponds to the fist instruction. It takes a
854
    /// chain operand, value to store, address, and glue. The memory VT
855
    /// specifies the type to store as.
856
    FIST,
857

858
    /// This instruction implements an extending load to FP stack slots.
859
    /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
860
    /// operand, and ptr to load from. The memory VT specifies the type to
861
    /// load from.
862
    FLD,
863

864
    /// This instruction implements a truncating store from FP stack
865
    /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
866
    /// chain operand, value to store, address, and glue. The memory VT
867
    /// specifies the type to store as.
868
    FST,
869

870
    /// These instructions grab the address of the next argument
871
    /// from a va_list. (reads and modifies the va_list in memory)
872
    VAARG_64,
873
    VAARG_X32,
874

875
    // Vector truncating store with unsigned/signed saturation
876
    VTRUNCSTOREUS,
877
    VTRUNCSTORES,
878
    // Vector truncating masked store with unsigned/signed saturation
879
    VMTRUNCSTOREUS,
880
    VMTRUNCSTORES,
881

882
    // X86 specific gather and scatter
883
    MGATHER,
884
    MSCATTER,
885

886
    // Key locker nodes that produce flags.
887
    AESENC128KL,
888
    AESDEC128KL,
889
    AESENC256KL,
890
    AESDEC256KL,
891
    AESENCWIDE128KL,
892
    AESDECWIDE128KL,
893
    AESENCWIDE256KL,
894
    AESDECWIDE256KL,
895

896
    /// Compare and Add if Condition is Met. Compare value in operand 2 with
897
    /// value in memory of operand 1. If condition of operand 4 is met, add
898
    /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
899
    /// always updated with the original value from operand 1.
900
    CMPCCXADD,
901

902
    // Save xmm argument registers to the stack, according to %al. An operator
903
    // is needed so that this can be expanded with control flow.
904
    VASTART_SAVE_XMM_REGS,
905

906
    // Conditional load/store instructions
907
    CLOAD,
908
    CSTORE,
909

910
    // WARNING: Do not add anything in the end unless you want the node to
911
    // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
912
    // opcodes will be thought as target memory ops!
913
  };
914
  } // end namespace X86ISD
915

916
  namespace X86 {
917
    /// Current rounding mode is represented in bits 11:10 of FPSR. These
918
    /// values are same as corresponding constants for rounding mode used
919
    /// in glibc.
920
    enum RoundingMode {
921
      rmToNearest   = 0,        // FE_TONEAREST
922
      rmDownward    = 1 << 10,  // FE_DOWNWARD
923
      rmUpward      = 2 << 10,  // FE_UPWARD
924
      rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
925
      rmMask        = 3 << 10   // Bit mask selecting rounding mode
926
    };
927
  }
928

929
  /// Define some predicates that are used for node matching.
930
  namespace X86 {
931
    /// Returns true if Elt is a constant zero or floating point constant +0.0.
932
    bool isZeroNode(SDValue Elt);
933

934
    /// Returns true of the given offset can be
935
    /// fit into displacement field of the instruction.
936
    bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
937
                                      bool hasSymbolicDisplacement);
938

939
    /// Determines whether the callee is required to pop its
940
    /// own arguments. Callee pop is necessary to support tail calls.
941
    bool isCalleePop(CallingConv::ID CallingConv,
942
                     bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
943

944
    /// If Op is a constant whose elements are all the same constant or
945
    /// undefined, return true and return the constant value in \p SplatVal.
946
    /// If we have undef bits that don't cover an entire element, we treat these
947
    /// as zero if AllowPartialUndefs is set, else we fail and return false.
948
    bool isConstantSplat(SDValue Op, APInt &SplatVal,
949
                         bool AllowPartialUndefs = true);
950

951
    /// Check if Op is a load operation that could be folded into some other x86
952
    /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
953
    bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
954
                     bool AssumeSingleUse = false);
955

956
    /// Check if Op is a load operation that could be folded into a vector splat
957
    /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
958
    bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
959
                                         const X86Subtarget &Subtarget,
960
                                         bool AssumeSingleUse = false);
961

962
    /// Check if Op is a value that could be used to fold a store into some
963
    /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
964
    bool mayFoldIntoStore(SDValue Op);
965

966
    /// Check if Op is an operation that could be folded into a zero extend x86
967
    /// instruction.
968
    bool mayFoldIntoZeroExtend(SDValue Op);
969

970
    /// True if the target supports the extended frame for async Swift
971
    /// functions.
972
    bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
973
                                            const MachineFunction &MF);
974
  } // end namespace X86
975

976
  //===--------------------------------------------------------------------===//
977
  //  X86 Implementation of the TargetLowering interface
978
  class X86TargetLowering final : public TargetLowering {
979
  public:
980
    explicit X86TargetLowering(const X86TargetMachine &TM,
981
                               const X86Subtarget &STI);
982

983
    unsigned getJumpTableEncoding() const override;
984
    bool useSoftFloat() const override;
985

986
    void markLibCallAttributes(MachineFunction *MF, unsigned CC,
987
                               ArgListTy &Args) const override;
988

989
    MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
990
      return MVT::i8;
991
    }
992

993
    const MCExpr *
994
    LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
995
                              const MachineBasicBlock *MBB, unsigned uid,
996
                              MCContext &Ctx) const override;
997

998
    /// Returns relocation base for the given PIC jumptable.
999
    SDValue getPICJumpTableRelocBase(SDValue Table,
1000
                                     SelectionDAG &DAG) const override;
1001
    const MCExpr *
1002
    getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1003
                                 unsigned JTI, MCContext &Ctx) const override;
1004

1005
    /// Return the desired alignment for ByVal aggregate
1006
    /// function arguments in the caller parameter area. For X86, aggregates
1007
    /// that contains are placed at 16-byte boundaries while the rest are at
1008
    /// 4-byte boundaries.
1009
    uint64_t getByValTypeAlignment(Type *Ty,
1010
                                   const DataLayout &DL) const override;
1011

1012
    EVT getOptimalMemOpType(const MemOp &Op,
1013
                            const AttributeList &FuncAttributes) const override;
1014

1015
    /// Returns true if it's safe to use load / store of the
1016
    /// specified type to expand memcpy / memset inline. This is mostly true
1017
    /// for all types except for some special cases. For example, on X86
1018
    /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1019
    /// also does type conversion. Note the specified type doesn't have to be
1020
    /// legal as the hook is used before type legalization.
1021
    bool isSafeMemOpType(MVT VT) const override;
1022

1023
    bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1024

1025
    /// Returns true if the target allows unaligned memory accesses of the
1026
    /// specified type. Returns whether it is "fast" in the last argument.
1027
    bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1028
                                        MachineMemOperand::Flags Flags,
1029
                                        unsigned *Fast) const override;
1030

1031
    /// This function returns true if the memory access is aligned or if the
1032
    /// target allows this specific unaligned memory access. If the access is
1033
    /// allowed, the optional final parameter returns a relative speed of the
1034
    /// access (as defined by the target).
1035
    bool allowsMemoryAccess(
1036
        LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1037
        Align Alignment,
1038
        MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1039
        unsigned *Fast = nullptr) const override;
1040

1041
    bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1042
                            const MachineMemOperand &MMO,
1043
                            unsigned *Fast) const {
1044
      return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
1045
                                MMO.getAlign(), MMO.getFlags(), Fast);
1046
    }
1047

1048
    /// Provide custom lowering hooks for some operations.
1049
    ///
1050
    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1051

1052
    /// Replace the results of node with an illegal result
1053
    /// type with new values built out of custom code.
1054
    ///
1055
    void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1056
                            SelectionDAG &DAG) const override;
1057

1058
    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1059

1060
    bool preferABDSToABSWithNSW(EVT VT) const override;
1061

1062
    bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1063
                                   EVT ExtVT) const override;
1064

1065
    bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1066
                                           EVT VT) const override;
1067

1068
    /// Return true if the target has native support for
1069
    /// the specified value type and it is 'desirable' to use the type for the
1070
    /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1071
    /// instruction encodings are longer and some i16 instructions are slow.
1072
    bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1073

1074
    /// Return true if the target has native support for the
1075
    /// specified value type and it is 'desirable' to use the type. e.g. On x86
1076
    /// i16 is legal, but undesirable since i16 instruction encodings are longer
1077
    /// and some i16 instructions are slow.
1078
    bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1079

1080
    /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1081
    /// integer, None otherwise.
1082
    TargetLowering::AndOrSETCCFoldKind
1083
    isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1084
                                       const SDNode *SETCC0,
1085
                                       const SDNode *SETCC1) const override;
1086

1087
    /// Return the newly negated expression if the cost is not expensive and
1088
    /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1089
    /// do the negation.
1090
    SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1091
                                 bool LegalOperations, bool ForCodeSize,
1092
                                 NegatibleCost &Cost,
1093
                                 unsigned Depth) const override;
1094

1095
    MachineBasicBlock *
1096
    EmitInstrWithCustomInserter(MachineInstr &MI,
1097
                                MachineBasicBlock *MBB) const override;
1098

1099
    /// This method returns the name of a target specific DAG node.
1100
    const char *getTargetNodeName(unsigned Opcode) const override;
1101

1102
    /// Do not merge vector stores after legalization because that may conflict
1103
    /// with x86-specific store splitting optimizations.
1104
    bool mergeStoresAfterLegalization(EVT MemVT) const override {
1105
      return !MemVT.isVector();
1106
    }
1107

1108
    bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1109
                          const MachineFunction &MF) const override;
1110

1111
    bool isCheapToSpeculateCttz(Type *Ty) const override;
1112

1113
    bool isCheapToSpeculateCtlz(Type *Ty) const override;
1114

1115
    bool isCtlzFast() const override;
1116

1117
    bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1118
      // If the pair to store is a mixture of float and int values, we will
1119
      // save two bitwise instructions and one float-to-int instruction and
1120
      // increase one store instruction. There is potentially a more
1121
      // significant benefit because it avoids the float->int domain switch
1122
      // for input value. So It is more likely a win.
1123
      if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1124
          (LTy.isInteger() && HTy.isFloatingPoint()))
1125
        return true;
1126
      // If the pair only contains int values, we will save two bitwise
1127
      // instructions and increase one store instruction (costing one more
1128
      // store buffer). Since the benefit is more blurred so we leave
1129
      // such pair out until we get testcase to prove it is a win.
1130
      return false;
1131
    }
1132

1133
    bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1134

1135
    bool hasAndNotCompare(SDValue Y) const override;
1136

1137
    bool hasAndNot(SDValue Y) const override;
1138

1139
    bool hasBitTest(SDValue X, SDValue Y) const override;
1140

1141
    bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1142
        SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1143
        unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1144
        SelectionDAG &DAG) const override;
1145

1146
    unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1147
        EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1148
        const APInt &ShiftOrRotateAmt,
1149
        const std::optional<APInt> &AndMask) const override;
1150

1151
    bool preferScalarizeSplat(SDNode *N) const override;
1152

1153
    CondMergingParams
1154
    getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1155
                                  const Value *Rhs) const override;
1156

1157
    bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1158
                                           CombineLevel Level) const override;
1159

1160
    bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1161

1162
    bool
1163
    shouldTransformSignedTruncationCheck(EVT XVT,
1164
                                         unsigned KeptBits) const override {
1165
      // For vectors, we don't have a preference..
1166
      if (XVT.isVector())
1167
        return false;
1168

1169
      auto VTIsOk = [](EVT VT) -> bool {
1170
        return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1171
               VT == MVT::i64;
1172
      };
1173

1174
      // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1175
      // XVT will be larger than KeptBitsVT.
1176
      MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1177
      return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1178
    }
1179

1180
    ShiftLegalizationStrategy
1181
    preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1182
                                       unsigned ExpansionFactor) const override;
1183

1184
    bool shouldSplatInsEltVarIndex(EVT VT) const override;
1185

1186
    bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1187
      // Converting to sat variants holds little benefit on X86 as we will just
1188
      // need to saturate the value back using fp arithmatic.
1189
      return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1190
    }
1191

1192
    bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1193
      return VT.isScalarInteger();
1194
    }
1195

1196
    /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1197
    MVT hasFastEqualityCompare(unsigned NumBits) const override;
1198

1199
    /// Return the value type to use for ISD::SETCC.
1200
    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1201
                           EVT VT) const override;
1202

1203
    bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1204
                                      const APInt &DemandedElts,
1205
                                      TargetLoweringOpt &TLO) const override;
1206

1207
    /// Determine which of the bits specified in Mask are known to be either
1208
    /// zero or one and return them in the KnownZero/KnownOne bitsets.
1209
    void computeKnownBitsForTargetNode(const SDValue Op,
1210
                                       KnownBits &Known,
1211
                                       const APInt &DemandedElts,
1212
                                       const SelectionDAG &DAG,
1213
                                       unsigned Depth = 0) const override;
1214

1215
    /// Determine the number of bits in the operation that are sign bits.
1216
    unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1217
                                             const APInt &DemandedElts,
1218
                                             const SelectionDAG &DAG,
1219
                                             unsigned Depth) const override;
1220

1221
    bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1222
                                                 const APInt &DemandedElts,
1223
                                                 APInt &KnownUndef,
1224
                                                 APInt &KnownZero,
1225
                                                 TargetLoweringOpt &TLO,
1226
                                                 unsigned Depth) const override;
1227

1228
    bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1229
                                                    const APInt &DemandedElts,
1230
                                                    unsigned MaskIndex,
1231
                                                    TargetLoweringOpt &TLO,
1232
                                                    unsigned Depth) const;
1233

1234
    bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1235
                                           const APInt &DemandedBits,
1236
                                           const APInt &DemandedElts,
1237
                                           KnownBits &Known,
1238
                                           TargetLoweringOpt &TLO,
1239
                                           unsigned Depth) const override;
1240

1241
    SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1242
        SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1243
        SelectionDAG &DAG, unsigned Depth) const override;
1244

1245
    bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1246
        SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1247
        bool PoisonOnly, unsigned Depth) const override;
1248

1249
    bool canCreateUndefOrPoisonForTargetNode(
1250
        SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1251
        bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1252

1253
    bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1254
                                   APInt &UndefElts, const SelectionDAG &DAG,
1255
                                   unsigned Depth) const override;
1256

1257
    bool isTargetCanonicalConstantNode(SDValue Op) const override {
1258
      // Peek through bitcasts/extracts/inserts to see if we have a broadcast
1259
      // vector from memory.
1260
      while (Op.getOpcode() == ISD::BITCAST ||
1261
             Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1262
             (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1263
              Op.getOperand(0).isUndef()))
1264
        Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1265

1266
      return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1267
             TargetLowering::isTargetCanonicalConstantNode(Op);
1268
    }
1269

1270
    const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1271

1272
    SDValue unwrapAddress(SDValue N) const override;
1273

1274
    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1275

1276
    bool ExpandInlineAsm(CallInst *CI) const override;
1277

1278
    ConstraintType getConstraintType(StringRef Constraint) const override;
1279

1280
    /// Examine constraint string and operand type and determine a weight value.
1281
    /// The operand object must already have been set up with the operand type.
1282
    ConstraintWeight
1283
      getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1284
                                     const char *Constraint) const override;
1285

1286
    const char *LowerXConstraint(EVT ConstraintVT) const override;
1287

1288
    /// Lower the specified operand into the Ops vector. If it is invalid, don't
1289
    /// add anything to Ops. If hasMemory is true it means one of the asm
1290
    /// constraint of the inline asm instruction being processed is 'm'.
1291
    void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1292
                                      std::vector<SDValue> &Ops,
1293
                                      SelectionDAG &DAG) const override;
1294

1295
    InlineAsm::ConstraintCode
1296
    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1297
      if (ConstraintCode == "v")
1298
        return InlineAsm::ConstraintCode::v;
1299
      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1300
    }
1301

1302
    /// Handle Lowering flag assembly outputs.
1303
    SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1304
                                        const SDLoc &DL,
1305
                                        const AsmOperandInfo &Constraint,
1306
                                        SelectionDAG &DAG) const override;
1307

1308
    /// Given a physical register constraint
1309
    /// (e.g. {edx}), return the register number and the register class for the
1310
    /// register.  This should only be used for C_Register constraints.  On
1311
    /// error, this returns a register number of 0.
1312
    std::pair<unsigned, const TargetRegisterClass *>
1313
    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1314
                                 StringRef Constraint, MVT VT) const override;
1315

1316
    /// Return true if the addressing mode represented
1317
    /// by AM is legal for this target, for a load/store of the specified type.
1318
    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1319
                               Type *Ty, unsigned AS,
1320
                               Instruction *I = nullptr) const override;
1321

1322
    bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
1323

1324
    /// Return true if the specified immediate is legal
1325
    /// icmp immediate, that is the target has icmp instructions which can
1326
    /// compare a register against the immediate without having to materialize
1327
    /// the immediate into a register.
1328
    bool isLegalICmpImmediate(int64_t Imm) const override;
1329

1330
    /// Return true if the specified immediate is legal
1331
    /// add immediate, that is the target has add instructions which can
1332
    /// add a register and the immediate without having to materialize
1333
    /// the immediate into a register.
1334
    bool isLegalAddImmediate(int64_t Imm) const override;
1335

1336
    bool isLegalStoreImmediate(int64_t Imm) const override;
1337

1338
    /// This is used to enable splatted operand transforms for vector shifts
1339
    /// and vector funnel shifts.
1340
    bool isVectorShiftByScalarCheap(Type *Ty) const override;
1341

1342
    /// Add x86-specific opcodes to the default list.
1343
    bool isBinOp(unsigned Opcode) const override;
1344

1345
    /// Returns true if the opcode is a commutative binary operation.
1346
    bool isCommutativeBinOp(unsigned Opcode) const override;
1347

1348
    /// Return true if it's free to truncate a value of
1349
    /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1350
    /// register EAX to i16 by referencing its sub-register AX.
1351
    bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1352
    bool isTruncateFree(EVT VT1, EVT VT2) const override;
1353

1354
    bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1355

1356
    /// Return true if any actual instruction that defines a
1357
    /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1358
    /// register. This does not necessarily include registers defined in
1359
    /// unknown ways, such as incoming arguments, or copies from unknown
1360
    /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1361
    /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1362
    /// all instructions that define 32-bit values implicit zero-extend the
1363
    /// result out to 64 bits.
1364
    bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1365
    bool isZExtFree(EVT VT1, EVT VT2) const override;
1366
    bool isZExtFree(SDValue Val, EVT VT2) const override;
1367

1368
    bool shouldSinkOperands(Instruction *I,
1369
                            SmallVectorImpl<Use *> &Ops) const override;
1370
    bool shouldConvertPhiType(Type *From, Type *To) const override;
1371

1372
    /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1373
    /// extend node) is profitable.
1374
    bool isVectorLoadExtDesirable(SDValue) const override;
1375

1376
    /// Return true if an FMA operation is faster than a pair of fmul and fadd
1377
    /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1378
    /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1379
    bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1380
                                    EVT VT) const override;
1381

1382
    /// Return true if it's profitable to narrow operations of type SrcVT to
1383
    /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1384
    /// from i32 to i16.
1385
    bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;
1386

1387
    bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1388
                                              EVT VT) const override;
1389

1390
    /// Given an intrinsic, checks if on the target the intrinsic will need to map
1391
    /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1392
    /// true and stores the intrinsic information into the IntrinsicInfo that was
1393
    /// passed to the function.
1394
    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1395
                            MachineFunction &MF,
1396
                            unsigned Intrinsic) const override;
1397

1398
    /// Returns true if the target can instruction select the
1399
    /// specified FP immediate natively. If false, the legalizer will
1400
    /// materialize the FP immediate as a load from a constant pool.
1401
    bool isFPImmLegal(const APFloat &Imm, EVT VT,
1402
                      bool ForCodeSize) const override;
1403

1404
    /// Targets can use this to indicate that they only support *some*
1405
    /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1406
    /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1407
    /// be legal.
1408
    bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1409

1410
    /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1411
    /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1412
    /// constant pool entry.
1413
    bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1414

1415
    /// Returns true if lowering to a jump table is allowed.
1416
    bool areJTsAllowed(const Function *Fn) const override;
1417

1418
    MVT getPreferredSwitchConditionType(LLVMContext &Context,
1419
                                        EVT ConditionVT) const override;
1420

1421
    /// If true, then instruction selection should
1422
    /// seek to shrink the FP constant of the specified type to a smaller type
1423
    /// in order to save space and / or reduce runtime.
1424
    bool ShouldShrinkFPConstant(EVT VT) const override;
1425

1426
    /// Return true if we believe it is correct and profitable to reduce the
1427
    /// load node to a smaller type.
1428
    bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1429
                               EVT NewVT) const override;
1430

1431
    /// Return true if the specified scalar FP type is computed in an SSE
1432
    /// register, not on the X87 floating point stack.
1433
    bool isScalarFPTypeInSSEReg(EVT VT) const;
1434

1435
    /// Returns true if it is beneficial to convert a load of a constant
1436
    /// to just the constant itself.
1437
    bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1438
                                           Type *Ty) const override;
1439

1440
    bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1441

1442
    bool convertSelectOfConstantsToMath(EVT VT) const override;
1443

1444
    bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1445
                                SDValue C) const override;
1446

1447
    /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1448
    /// with this index.
1449
    bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1450
                                 unsigned Index) const override;
1451

1452
    /// Scalar ops always have equal or better analysis/performance/power than
1453
    /// the vector equivalent, so this always makes sense if the scalar op is
1454
    /// supported.
1455
    bool shouldScalarizeBinop(SDValue) const override;
1456

1457
    /// Extract of a scalar FP value from index 0 of a vector is free.
1458
    bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1459
      EVT EltVT = VT.getScalarType();
1460
      return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1461
    }
1462

1463
    /// Overflow nodes should get combined/lowered to optimal instructions
1464
    /// (they should allow eliminating explicit compares by getting flags from
1465
    /// math ops).
1466
    bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1467
                              bool MathUsed) const override;
1468

1469
    bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1470
                                      unsigned AddrSpace) const override {
1471
      // If we can replace more than 2 scalar stores, there will be a reduction
1472
      // in instructions even after we add a vector constant load.
1473
      return IsZero || NumElem > 2;
1474
    }
1475

1476
    bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1477
                                 const SelectionDAG &DAG,
1478
                                 const MachineMemOperand &MMO) const override;
1479

1480
    Register getRegisterByName(const char* RegName, LLT VT,
1481
                               const MachineFunction &MF) const override;
1482

1483
    /// If a physical register, this returns the register that receives the
1484
    /// exception address on entry to an EH pad.
1485
    Register
1486
    getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1487

1488
    /// If a physical register, this returns the register that receives the
1489
    /// exception typeid on entry to a landing pad.
1490
    Register
1491
    getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1492

1493
    bool needsFixedCatchObjects() const override;
1494

1495
    /// This method returns a target specific FastISel object,
1496
    /// or null if the target does not support "fast" ISel.
1497
    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1498
                             const TargetLibraryInfo *libInfo) const override;
1499

1500
    /// If the target has a standard location for the stack protector cookie,
1501
    /// returns the address of that location. Otherwise, returns nullptr.
1502
    Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1503

1504
    bool useLoadStackGuardNode() const override;
1505
    bool useStackGuardXorFP() const override;
1506
    void insertSSPDeclarations(Module &M) const override;
1507
    Value *getSDagStackGuard(const Module &M) const override;
1508
    Function *getSSPStackGuardCheck(const Module &M) const override;
1509
    SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1510
                                const SDLoc &DL) const override;
1511

1512

1513
    /// Return true if the target stores SafeStack pointer at a fixed offset in
1514
    /// some non-standard address space, and populates the address space and
1515
    /// offset as appropriate.
1516
    Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1517

1518
    std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1519
                                          SDValue Chain, SDValue Pointer,
1520
                                          MachinePointerInfo PtrInfo,
1521
                                          Align Alignment,
1522
                                          SelectionDAG &DAG) const;
1523

1524
    /// Customize the preferred legalization strategy for certain types.
1525
    LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1526

1527
    bool softPromoteHalfType() const override { return true; }
1528

1529
    MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1530
                                      EVT VT) const override;
1531

1532
    unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1533
                                           CallingConv::ID CC,
1534
                                           EVT VT) const override;
1535

1536
    unsigned getVectorTypeBreakdownForCallingConv(
1537
        LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1538
        unsigned &NumIntermediates, MVT &RegisterVT) const override;
1539

1540
    bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1541

1542
    bool supportSwiftError() const override;
1543

1544
    bool supportKCFIBundles() const override { return true; }
1545

1546
    MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1547
                                MachineBasicBlock::instr_iterator &MBBI,
1548
                                const TargetInstrInfo *TII) const override;
1549

1550
    bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1551
    bool hasInlineStackProbe(const MachineFunction &MF) const override;
1552
    StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1553

1554
    unsigned getStackProbeSize(const MachineFunction &MF) const;
1555

1556
    bool hasVectorBlend() const override { return true; }
1557

1558
    unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1559

1560
    bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1561
                                 unsigned OpNo) const override;
1562

1563
    SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1564
                            MachineMemOperand *MMO, SDValue &NewLoad,
1565
                            SDValue Ptr, SDValue PassThru,
1566
                            SDValue Mask) const override;
1567
    SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1568
                             MachineMemOperand *MMO, SDValue Ptr, SDValue Val,
1569
                             SDValue Mask) const override;
1570

1571
    /// Lower interleaved load(s) into target specific
1572
    /// instructions/intrinsics.
1573
    bool lowerInterleavedLoad(LoadInst *LI,
1574
                              ArrayRef<ShuffleVectorInst *> Shuffles,
1575
                              ArrayRef<unsigned> Indices,
1576
                              unsigned Factor) const override;
1577

1578
    /// Lower interleaved store(s) into target specific
1579
    /// instructions/intrinsics.
1580
    bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1581
                               unsigned Factor) const override;
1582

1583
    SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1584
                                   int JTI, SelectionDAG &DAG) const override;
1585

1586
    Align getPrefLoopAlignment(MachineLoop *ML) const override;
1587

1588
    EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1589
      if (VT == MVT::f80)
1590
        return EVT::getIntegerVT(Context, 96);
1591
      return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1592
    }
1593

1594
  protected:
1595
    std::pair<const TargetRegisterClass *, uint8_t>
1596
    findRepresentativeClass(const TargetRegisterInfo *TRI,
1597
                            MVT VT) const override;
1598

1599
  private:
1600
    /// Keep a reference to the X86Subtarget around so that we can
1601
    /// make the right decision when generating code for different targets.
1602
    const X86Subtarget &Subtarget;
1603

1604
    /// A list of legal FP immediates.
1605
    std::vector<APFloat> LegalFPImmediates;
1606

1607
    /// Indicate that this x86 target can instruction
1608
    /// select the specified FP immediate natively.
1609
    void addLegalFPImmediate(const APFloat& Imm) {
1610
      LegalFPImmediates.push_back(Imm);
1611
    }
1612

1613
    SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1614
                            CallingConv::ID CallConv, bool isVarArg,
1615
                            const SmallVectorImpl<ISD::InputArg> &Ins,
1616
                            const SDLoc &dl, SelectionDAG &DAG,
1617
                            SmallVectorImpl<SDValue> &InVals,
1618
                            uint32_t *RegMask) const;
1619
    SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1620
                             const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1621
                             const SDLoc &dl, SelectionDAG &DAG,
1622
                             const CCValAssign &VA, MachineFrameInfo &MFI,
1623
                             unsigned i) const;
1624
    SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1625
                             const SDLoc &dl, SelectionDAG &DAG,
1626
                             const CCValAssign &VA,
1627
                             ISD::ArgFlagsTy Flags, bool isByval) const;
1628

1629
    // Call lowering helpers.
1630

1631
    /// Check whether the call is eligible for tail call optimization. Targets
1632
    /// that want to do tail call optimization should implement this function.
1633
    bool IsEligibleForTailCallOptimization(
1634
        TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
1635
        SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const;
1636
    SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1637
                                    SDValue Chain, bool IsTailCall,
1638
                                    bool Is64Bit, int FPDiff,
1639
                                    const SDLoc &dl) const;
1640

1641
    unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1642
                                         SelectionDAG &DAG) const;
1643

1644
    unsigned getAddressSpace() const;
1645

1646
    SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1647
                            SDValue &Chain) const;
1648
    SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1649

1650
    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1651
    SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1652
    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1653
    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1654

1655
    unsigned getGlobalWrapperKind(const GlobalValue *GV,
1656
                                  const unsigned char OpFlags) const;
1657
    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1658
    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1659
    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1660
    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1661
    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1662

1663
    /// Creates target global address or external symbol nodes for calls or
1664
    /// other uses.
1665
    SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1666
                                  bool ForCall) const;
1667

1668
    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1669
    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1670
    SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1671
    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1672
    SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1673
    SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1674
    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1675
    SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1676
    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1677
    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1678
    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1679
    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1680
    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1681
    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1682
    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1683
    SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1684
    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1685
    SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1686
    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1687
    SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1688
    SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1689
    SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1690
    SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1691
    SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1692
    SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1693
    SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1694
    SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1695
    SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1696
    SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1697
    SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1698
                                    SDValue &Chain) const;
1699
    SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1700
    SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1701
    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1702
    SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1703
    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1704
    SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1705
    SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1706

1707
    SDValue
1708
    LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1709
                         const SmallVectorImpl<ISD::InputArg> &Ins,
1710
                         const SDLoc &dl, SelectionDAG &DAG,
1711
                         SmallVectorImpl<SDValue> &InVals) const override;
1712
    SDValue LowerCall(CallLoweringInfo &CLI,
1713
                      SmallVectorImpl<SDValue> &InVals) const override;
1714

1715
    SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1716
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
1717
                        const SmallVectorImpl<SDValue> &OutVals,
1718
                        const SDLoc &dl, SelectionDAG &DAG) const override;
1719

1720
    bool supportSplitCSR(MachineFunction *MF) const override {
1721
      return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1722
          MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1723
    }
1724
    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1725
    void insertCopiesSplitCSR(
1726
      MachineBasicBlock *Entry,
1727
      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1728

1729
    bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1730

1731
    bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1732

1733
    EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1734
                            ISD::NodeType ExtendKind) const override;
1735

1736
    bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1737
                        bool isVarArg,
1738
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
1739
                        LLVMContext &Context) const override;
1740

1741
    const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1742
    ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1743

1744
    TargetLoweringBase::AtomicExpansionKind
1745
    shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1746
    TargetLoweringBase::AtomicExpansionKind
1747
    shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1748
    TargetLoweringBase::AtomicExpansionKind
1749
    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1750
    TargetLoweringBase::AtomicExpansionKind
1751
    shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1752
    void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1753
    void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1754

1755
    LoadInst *
1756
    lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1757

1758
    bool needsCmpXchgNb(Type *MemType) const;
1759

1760
    void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1761
                                MachineBasicBlock *DispatchBB, int FI) const;
1762

1763
    // Utility function to emit the low-level va_arg code for X86-64.
1764
    MachineBasicBlock *
1765
    EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1766

1767
    /// Utility function to emit the xmm reg save portion of va_start.
1768
    MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1769
                                                 MachineInstr &MI2,
1770
                                                 MachineBasicBlock *BB) const;
1771

1772
    MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1773
                                         MachineBasicBlock *BB) const;
1774

1775
    MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1776
                                           MachineBasicBlock *BB) const;
1777

1778
    MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1779
                                            MachineBasicBlock *BB) const;
1780

1781
    MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1782
                                               MachineBasicBlock *BB) const;
1783

1784
    MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1785
                                          MachineBasicBlock *BB) const;
1786

1787
    MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1788
                                          MachineBasicBlock *BB) const;
1789

1790
    MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1791
                                                MachineBasicBlock *BB) const;
1792

1793
    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1794
                                        MachineBasicBlock *MBB) const;
1795

1796
    void emitSetJmpShadowStackFix(MachineInstr &MI,
1797
                                  MachineBasicBlock *MBB) const;
1798

1799
    MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1800
                                         MachineBasicBlock *MBB) const;
1801

1802
    MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1803
                                                 MachineBasicBlock *MBB) const;
1804

1805
    MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1806
                                             MachineBasicBlock *MBB) const;
1807

1808
    MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI,
1809
                                              MachineBasicBlock *MBB) const;
1810

1811
    /// Emit flags for the given setcc condition and operands. Also returns the
1812
    /// corresponding X86 condition code constant in X86CC.
1813
    SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1814
                              const SDLoc &dl, SelectionDAG &DAG,
1815
                              SDValue &X86CC) const;
1816

1817
    bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1818
                                             SDValue IntPow2) const override;
1819

1820
    /// Check if replacement of SQRT with RSQRT should be disabled.
1821
    bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1822

1823
    /// Use rsqrt* to speed up sqrt calculations.
1824
    SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1825
                            int &RefinementSteps, bool &UseOneConstNR,
1826
                            bool Reciprocal) const override;
1827

1828
    /// Use rcp* to speed up fdiv calculations.
1829
    SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1830
                             int &RefinementSteps) const override;
1831

1832
    /// Reassociate floating point divisions into multiply by reciprocal.
1833
    unsigned combineRepeatedFPDivisors() const override;
1834

1835
    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1836
                          SmallVectorImpl<SDNode *> &Created) const override;
1837

1838
    SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1839
                    SDValue V2) const;
1840
  };
1841

1842
  namespace X86 {
1843
    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1844
                             const TargetLibraryInfo *libInfo);
1845
  } // end namespace X86
1846

1847
  // X86 specific Gather/Scatter nodes.
1848
  // The class has the same order of operands as MaskedGatherScatterSDNode for
1849
  // convenience.
1850
  class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1851
  public:
1852
    // This is a intended as a utility and should never be directly created.
1853
    X86MaskedGatherScatterSDNode() = delete;
1854
    ~X86MaskedGatherScatterSDNode() = delete;
1855

1856
    const SDValue &getBasePtr() const { return getOperand(3); }
1857
    const SDValue &getIndex()   const { return getOperand(4); }
1858
    const SDValue &getMask()    const { return getOperand(2); }
1859
    const SDValue &getScale()   const { return getOperand(5); }
1860

1861
    static bool classof(const SDNode *N) {
1862
      return N->getOpcode() == X86ISD::MGATHER ||
1863
             N->getOpcode() == X86ISD::MSCATTER;
1864
    }
1865
  };
1866

1867
  class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1868
  public:
1869
    const SDValue &getPassThru() const { return getOperand(1); }
1870

1871
    static bool classof(const SDNode *N) {
1872
      return N->getOpcode() == X86ISD::MGATHER;
1873
    }
1874
  };
1875

1876
  class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1877
  public:
1878
    const SDValue &getValue() const { return getOperand(1); }
1879

1880
    static bool classof(const SDNode *N) {
1881
      return N->getOpcode() == X86ISD::MSCATTER;
1882
    }
1883
  };
1884

1885
  /// Generate unpacklo/unpackhi shuffle mask.
1886
  void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1887
                               bool Unary);
1888

1889
  /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1890
  /// imposed by AVX and specific to the unary pattern. Example:
1891
  /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1892
  /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1893
  void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1894

1895
} // end namespace llvm
1896

1897
#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1898

1899
Product

Resources

Company