CoCalc -- NeonEmitter.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/utils/TableGen/NeonEmitter.cpp
³⁵²³⁰ views
1
//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This tablegen backend is responsible for emitting arm_neon.h, which includes
10
// a declaration and definition of each function specified by the ARM NEON
11
// compiler interface.  See ARM document DUI0348B.
12
//
13
// Each NEON instruction is implemented in terms of 1 or more functions which
14
// are suffixed with the element type of the input vectors.  Functions may be
15
// implemented in terms of generic vector operations such as +, *, -, etc. or
16
// by calling a __builtin_-prefixed function which will be handled by clang's
17
// CodeGen library.
18
//
19
// Additional validation code can be generated by this file when runHeader() is
20
// called, rather than the normal run() entry point.
21
//
22
// See also the documentation in include/clang/Basic/arm_neon.td.
23
//
24
//===----------------------------------------------------------------------===//
25

26
#include "TableGenBackends.h"
27
#include "llvm/ADT/ArrayRef.h"
28
#include "llvm/ADT/DenseMap.h"
29
#include "llvm/ADT/STLExtras.h"
30
#include "llvm/ADT/SmallVector.h"
31
#include "llvm/ADT/StringExtras.h"
32
#include "llvm/ADT/StringRef.h"
33
#include "llvm/Support/Casting.h"
34
#include "llvm/Support/ErrorHandling.h"
35
#include "llvm/Support/raw_ostream.h"
36
#include "llvm/TableGen/Error.h"
37
#include "llvm/TableGen/Record.h"
38
#include "llvm/TableGen/SetTheory.h"
39
#include <algorithm>
40
#include <cassert>
41
#include <cctype>
42
#include <cstddef>
43
#include <cstdint>
44
#include <deque>
45
#include <map>
46
#include <optional>
47
#include <set>
48
#include <sstream>
49
#include <string>
50
#include <utility>
51
#include <vector>
52

53
using namespace llvm;
54

55
namespace {
56

57
// While globals are generally bad, this one allows us to perform assertions
58
// liberally and somehow still trace them back to the def they indirectly
59
// came from.
60
static Record *CurrentRecord = nullptr;
61
static void assert_with_loc(bool Assertion, const std::string &Str) {
62
  if (!Assertion) {
63
    if (CurrentRecord)
64
      PrintFatalError(CurrentRecord->getLoc(), Str);
65
    else
66
      PrintFatalError(Str);
67
  }
68
}
69

70
enum ClassKind {
71
  ClassNone,
72
  ClassI,     // generic integer instruction, e.g., "i8" suffix
73
  ClassS,     // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
74
  ClassW,     // width-specific instruction, e.g., "8" suffix
75
  ClassB,     // bitcast arguments with enum argument to specify type
76
  ClassL,     // Logical instructions which are op instructions
77
              // but we need to not emit any suffix for in our
78
              // tests.
79
  ClassNoTest // Instructions which we do not test since they are
80
              // not TRUE instructions.
81
};
82

83
/// NeonTypeFlags - Flags to identify the types for overloaded Neon
84
/// builtins.  These must be kept in sync with the flags in
85
/// include/clang/Basic/TargetBuiltins.h.
86
namespace NeonTypeFlags {
87

88
enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
89

90
enum EltType {
91
  Int8,
92
  Int16,
93
  Int32,
94
  Int64,
95
  Poly8,
96
  Poly16,
97
  Poly64,
98
  Poly128,
99
  Float16,
100
  Float32,
101
  Float64,
102
  BFloat16
103
};
104

105
} // end namespace NeonTypeFlags
106

107
class NeonEmitter;
108

109
//===----------------------------------------------------------------------===//
110
// TypeSpec
111
//===----------------------------------------------------------------------===//
112

113
/// A TypeSpec is just a simple wrapper around a string, but gets its own type
114
/// for strong typing purposes.
115
///
116
/// A TypeSpec can be used to create a type.
117
class TypeSpec : public std::string {
118
public:
119
  static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
120
    std::vector<TypeSpec> Ret;
121
    TypeSpec Acc;
122
    for (char I : Str.str()) {
123
      if (islower(I)) {
124
        Acc.push_back(I);
125
        Ret.push_back(TypeSpec(Acc));
126
        Acc.clear();
127
      } else {
128
        Acc.push_back(I);
129
      }
130
    }
131
    return Ret;
132
  }
133
};
134

135
//===----------------------------------------------------------------------===//
136
// Type
137
//===----------------------------------------------------------------------===//
138

139
/// A Type. Not much more to say here.
140
class Type {
141
private:
142
  TypeSpec TS;
143

144
  enum TypeKind {
145
    Void,
146
    Float,
147
    SInt,
148
    UInt,
149
    Poly,
150
    BFloat16,
151
  };
152
  TypeKind Kind;
153
  bool Immediate, Constant, Pointer;
154
  // ScalarForMangling and NoManglingQ are really not suited to live here as
155
  // they are not related to the type. But they live in the TypeSpec (not the
156
  // prototype), so this is really the only place to store them.
157
  bool ScalarForMangling, NoManglingQ;
158
  unsigned Bitwidth, ElementBitwidth, NumVectors;
159

160
public:
161
  Type()
162
      : Kind(Void), Immediate(false), Constant(false),
163
        Pointer(false), ScalarForMangling(false), NoManglingQ(false),
164
        Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
165

166
  Type(TypeSpec TS, StringRef CharMods)
167
      : TS(std::move(TS)), Kind(Void), Immediate(false),
168
        Constant(false), Pointer(false), ScalarForMangling(false),
169
        NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
170
    applyModifiers(CharMods);
171
  }
172

173
  /// Returns a type representing "void".
174
  static Type getVoid() { return Type(); }
175

176
  bool operator==(const Type &Other) const { return str() == Other.str(); }
177
  bool operator!=(const Type &Other) const { return !operator==(Other); }
178

179
  //
180
  // Query functions
181
  //
182
  bool isScalarForMangling() const { return ScalarForMangling; }
183
  bool noManglingQ() const { return NoManglingQ; }
184

185
  bool isPointer() const { return Pointer; }
186
  bool isValue() const { return !isVoid() && !isPointer(); }
187
  bool isScalar() const { return isValue() && NumVectors == 0; }
188
  bool isVector() const { return isValue() && NumVectors > 0; }
189
  bool isConstPointer() const { return Constant; }
190
  bool isFloating() const { return Kind == Float; }
191
  bool isInteger() const { return Kind == SInt || Kind == UInt; }
192
  bool isPoly() const { return Kind == Poly; }
193
  bool isSigned() const { return Kind == SInt; }
194
  bool isImmediate() const { return Immediate; }
195
  bool isFloat() const { return isFloating() && ElementBitwidth == 32; }
196
  bool isDouble() const { return isFloating() && ElementBitwidth == 64; }
197
  bool isHalf() const { return isFloating() && ElementBitwidth == 16; }
198
  bool isChar() const { return ElementBitwidth == 8; }
199
  bool isShort() const { return isInteger() && ElementBitwidth == 16; }
200
  bool isInt() const { return isInteger() && ElementBitwidth == 32; }
201
  bool isLong() const { return isInteger() && ElementBitwidth == 64; }
202
  bool isVoid() const { return Kind == Void; }
203
  bool isBFloat16() const { return Kind == BFloat16; }
204
  unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
205
  unsigned getSizeInBits() const { return Bitwidth; }
206
  unsigned getElementSizeInBits() const { return ElementBitwidth; }
207
  unsigned getNumVectors() const { return NumVectors; }
208

209
  //
210
  // Mutator functions
211
  //
212
  void makeUnsigned() {
213
    assert(!isVoid() && "not a potentially signed type");
214
    Kind = UInt;
215
  }
216
  void makeSigned() {
217
    assert(!isVoid() && "not a potentially signed type");
218
    Kind = SInt;
219
  }
220

221
  void makeInteger(unsigned ElemWidth, bool Sign) {
222
    assert(!isVoid() && "converting void to int probably not useful");
223
    Kind = Sign ? SInt : UInt;
224
    Immediate = false;
225
    ElementBitwidth = ElemWidth;
226
  }
227

228
  void makeImmediate(unsigned ElemWidth) {
229
    Kind = SInt;
230
    Immediate = true;
231
    ElementBitwidth = ElemWidth;
232
  }
233

234
  void makeScalar() {
235
    Bitwidth = ElementBitwidth;
236
    NumVectors = 0;
237
  }
238

239
  void makeOneVector() {
240
    assert(isVector());
241
    NumVectors = 1;
242
  }
243

244
  void make32BitElement() {
245
    assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!");
246
    ElementBitwidth = 32;
247
  }
248

249
  void doubleLanes() {
250
    assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
251
    Bitwidth = 128;
252
  }
253

254
  void halveLanes() {
255
    assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
256
    Bitwidth = 64;
257
  }
258

259
  /// Return the C string representation of a type, which is the typename
260
  /// defined in stdint.h or arm_neon.h.
261
  std::string str() const;
262

263
  /// Return the string representation of a type, which is an encoded
264
  /// string for passing to the BUILTIN() macro in Builtins.def.
265
  std::string builtin_str() const;
266

267
  /// Return the value in NeonTypeFlags for this type.
268
  unsigned getNeonEnum() const;
269

270
  /// Parse a type from a stdint.h or arm_neon.h typedef name,
271
  /// for example uint32x2_t or int64_t.
272
  static Type fromTypedefName(StringRef Name);
273

274
private:
275
  /// Creates the type based on the typespec string in TS.
276
  /// Sets "Quad" to true if the "Q" or "H" modifiers were
277
  /// seen. This is needed by applyModifier as some modifiers
278
  /// only take effect if the type size was changed by "Q" or "H".
279
  void applyTypespec(bool &Quad);
280
  /// Applies prototype modifiers to the type.
281
  void applyModifiers(StringRef Mods);
282
};
283

284
//===----------------------------------------------------------------------===//
285
// Variable
286
//===----------------------------------------------------------------------===//
287

288
/// A variable is a simple class that just has a type and a name.
289
class Variable {
290
  Type T;
291
  std::string N;
292

293
public:
294
  Variable() : T(Type::getVoid()) {}
295
  Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}
296

297
  Type getType() const { return T; }
298
  std::string getName() const { return "__" + N; }
299
};
300

301
//===----------------------------------------------------------------------===//
302
// Intrinsic
303
//===----------------------------------------------------------------------===//
304

305
/// The main grunt class. This represents an instantiation of an intrinsic with
306
/// a particular typespec and prototype.
307
class Intrinsic {
308
  /// The Record this intrinsic was created from.
309
  Record *R;
310
  /// The unmangled name.
311
  std::string Name;
312
  /// The input and output typespecs. InTS == OutTS except when
313
  /// CartesianProductWith is non-empty - this is the case for vreinterpret.
314
  TypeSpec OutTS, InTS;
315
  /// The base class kind. Most intrinsics use ClassS, which has full type
316
  /// info for integers (s32/u32). Some use ClassI, which doesn't care about
317
  /// signedness (i32), while some (ClassB) have no type at all, only a width
318
  /// (32).
319
  ClassKind CK;
320
  /// The list of DAGs for the body. May be empty, in which case we should
321
  /// emit a builtin call.
322
  ListInit *Body;
323
  /// The architectural ifdef guard.
324
  std::string ArchGuard;
325
  /// The architectural target() guard.
326
  std::string TargetGuard;
327
  /// Set if the Unavailable bit is 1. This means we don't generate a body,
328
  /// just an "unavailable" attribute on a declaration.
329
  bool IsUnavailable;
330
  /// Is this intrinsic safe for big-endian? or does it need its arguments
331
  /// reversing?
332
  bool BigEndianSafe;
333

334
  /// The types of return value [0] and parameters [1..].
335
  std::vector<Type> Types;
336
  /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
337
  int PolymorphicKeyType;
338
  /// The local variables defined.
339
  std::map<std::string, Variable> Variables;
340
  /// NeededEarly - set if any other intrinsic depends on this intrinsic.
341
  bool NeededEarly;
342
  /// UseMacro - set if we should implement using a macro or unset for a
343
  ///            function.
344
  bool UseMacro;
345
  /// The set of intrinsics that this intrinsic uses/requires.
346
  std::set<Intrinsic *> Dependencies;
347
  /// The "base type", which is Type('d', OutTS). InBaseType is only
348
  /// different if CartesianProductWith is non-empty (for vreinterpret).
349
  Type BaseType, InBaseType;
350
  /// The return variable.
351
  Variable RetVar;
352
  /// A postfix to apply to every variable. Defaults to "".
353
  std::string VariablePostfix;
354

355
  NeonEmitter &Emitter;
356
  std::stringstream OS;
357

358
  bool isBigEndianSafe() const {
359
    if (BigEndianSafe)
360
      return true;
361

362
    for (const auto &T : Types){
363
      if (T.isVector() && T.getNumElements() > 1)
364
        return false;
365
    }
366
    return true;
367
  }
368

369
public:
370
  Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
371
            TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
372
            StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable, bool BigEndianSafe)
373
      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
374
        ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
375
        BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
376
        UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
377
        Emitter(Emitter) {
378
    // Modify the TypeSpec per-argument to get a concrete Type, and create
379
    // known variables for each.
380
    // Types[0] is the return value.
381
    unsigned Pos = 0;
382
    Types.emplace_back(OutTS, getNextModifiers(Proto, Pos));
383
    StringRef Mods = getNextModifiers(Proto, Pos);
384
    while (!Mods.empty()) {
385
      Types.emplace_back(InTS, Mods);
386
      if (Mods.contains('!'))
387
        PolymorphicKeyType = Types.size() - 1;
388

389
      Mods = getNextModifiers(Proto, Pos);
390
    }
391

392
    for (const auto &Type : Types) {
393
      // If this builtin takes an immediate argument, we need to #define it rather
394
      // than use a standard declaration, so that SemaChecking can range check
395
      // the immediate passed by the user.
396

397
      // Pointer arguments need to use macros to avoid hiding aligned attributes
398
      // from the pointer type.
399

400
      // It is not permitted to pass or return an __fp16 by value, so intrinsics
401
      // taking a scalar float16_t must be implemented as macros.
402
      if (Type.isImmediate() || Type.isPointer() ||
403
          (Type.isScalar() && Type.isHalf()))
404
        UseMacro = true;
405
    }
406
  }
407

408
  /// Get the Record that this intrinsic is based off.
409
  Record *getRecord() const { return R; }
410
  /// Get the set of Intrinsics that this intrinsic calls.
411
  /// this is the set of immediate dependencies, NOT the
412
  /// transitive closure.
413
  const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
414
  /// Get the architectural guard string (#ifdef).
415
  std::string getArchGuard() const { return ArchGuard; }
416
  std::string getTargetGuard() const { return TargetGuard; }
417
  /// Get the non-mangled name.
418
  std::string getName() const { return Name; }
419

420
  /// Return true if the intrinsic takes an immediate operand.
421
  bool hasImmediate() const {
422
    return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
423
  }
424

425
  /// Return the parameter index of the immediate operand.
426
  unsigned getImmediateIdx() const {
427
    for (unsigned Idx = 0; Idx < Types.size(); ++Idx)
428
      if (Types[Idx].isImmediate())
429
        return Idx - 1;
430
    llvm_unreachable("Intrinsic has no immediate");
431
  }
432

433

434
  unsigned getNumParams() const { return Types.size() - 1; }
435
  Type getReturnType() const { return Types[0]; }
436
  Type getParamType(unsigned I) const { return Types[I + 1]; }
437
  Type getBaseType() const { return BaseType; }
438
  Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; }
439

440
  /// Return true if the prototype has a scalar argument.
441
  bool protoHasScalar() const;
442

443
  /// Return the index that parameter PIndex will sit at
444
  /// in a generated function call. This is often just PIndex,
445
  /// but may not be as things such as multiple-vector operands
446
  /// and sret parameters need to be taken into account.
447
  unsigned getGeneratedParamIdx(unsigned PIndex) {
448
    unsigned Idx = 0;
449
    if (getReturnType().getNumVectors() > 1)
450
      // Multiple vectors are passed as sret.
451
      ++Idx;
452

453
    for (unsigned I = 0; I < PIndex; ++I)
454
      Idx += std::max(1U, getParamType(I).getNumVectors());
455

456
    return Idx;
457
  }
458

459
  bool hasBody() const { return Body && !Body->getValues().empty(); }
460

461
  void setNeededEarly() { NeededEarly = true; }
462

463
  bool operator<(const Intrinsic &Other) const {
464
    // Sort lexicographically on a three-tuple (ArchGuard, TargetGuard, Name)
465
    if (ArchGuard != Other.ArchGuard)
466
      return ArchGuard < Other.ArchGuard;
467
    if (TargetGuard != Other.TargetGuard)
468
      return TargetGuard < Other.TargetGuard;
469
    return Name < Other.Name;
470
  }
471

472
  ClassKind getClassKind(bool UseClassBIfScalar = false) {
473
    if (UseClassBIfScalar && !protoHasScalar())
474
      return ClassB;
475
    return CK;
476
  }
477

478
  /// Return the name, mangled with type information.
479
  /// If ForceClassS is true, use ClassS (u32/s32) instead
480
  /// of the intrinsic's own type class.
481
  std::string getMangledName(bool ForceClassS = false) const;
482
  /// Return the type code for a builtin function call.
483
  std::string getInstTypeCode(Type T, ClassKind CK) const;
484
  /// Return the type string for a BUILTIN() macro in Builtins.def.
485
  std::string getBuiltinTypeStr();
486

487
  /// Generate the intrinsic, returning code.
488
  std::string generate();
489
  /// Perform type checking and populate the dependency graph, but
490
  /// don't generate code yet.
491
  void indexBody();
492

493
private:
494
  StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const;
495

496
  std::string mangleName(std::string Name, ClassKind CK) const;
497

498
  void initVariables();
499
  std::string replaceParamsIn(std::string S);
500

501
  void emitBodyAsBuiltinCall();
502

503
  void generateImpl(bool ReverseArguments,
504
                    StringRef NamePrefix, StringRef CallPrefix);
505
  void emitReturn();
506
  void emitBody(StringRef CallPrefix);
507
  void emitShadowedArgs();
508
  void emitArgumentReversal();
509
  void emitReturnVarDecl();
510
  void emitReturnReversal();
511
  void emitReverseVariable(Variable &Dest, Variable &Src);
512
  void emitNewLine();
513
  void emitClosingBrace();
514
  void emitOpeningBrace();
515
  void emitPrototype(StringRef NamePrefix);
516

517
  class DagEmitter {
518
    Intrinsic &Intr;
519
    StringRef CallPrefix;
520

521
  public:
522
    DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
523
      Intr(Intr), CallPrefix(CallPrefix) {
524
    }
525
    std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);
526
    std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);
527
    std::pair<Type, std::string> emitDagSplat(DagInit *DI);
528
    std::pair<Type, std::string> emitDagDup(DagInit *DI);
529
    std::pair<Type, std::string> emitDagDupTyped(DagInit *DI);
530
    std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
531
    std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
532
    std::pair<Type, std::string> emitDagCall(DagInit *DI,
533
                                             bool MatchMangledName);
534
    std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
535
    std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
536
    std::pair<Type, std::string> emitDagOp(DagInit *DI);
537
    std::pair<Type, std::string> emitDag(DagInit *DI);
538
  };
539
};
540

541
//===----------------------------------------------------------------------===//
542
// NeonEmitter
543
//===----------------------------------------------------------------------===//
544

545
class NeonEmitter {
546
  RecordKeeper &Records;
547
  DenseMap<Record *, ClassKind> ClassMap;
548
  std::map<std::string, std::deque<Intrinsic>> IntrinsicMap;
549
  unsigned UniqueNumber;
550

551
  void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
552
  void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
553
  void genStreamingSVECompatibleList(raw_ostream &OS,
554
                                     SmallVectorImpl<Intrinsic *> &Defs);
555
  void genOverloadTypeCheckCode(raw_ostream &OS,
556
                                SmallVectorImpl<Intrinsic *> &Defs);
557
  void genIntrinsicRangeCheckCode(raw_ostream &OS,
558
                                  SmallVectorImpl<Intrinsic *> &Defs);
559

560
public:
561
  /// Called by Intrinsic - this attempts to get an intrinsic that takes
562
  /// the given types as arguments.
563
  Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types,
564
                          std::optional<std::string> MangledName);
565

566
  /// Called by Intrinsic - returns a globally-unique number.
567
  unsigned getUniqueNumber() { return UniqueNumber++; }
568

569
  NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {
570
    Record *SI = R.getClass("SInst");
571
    Record *II = R.getClass("IInst");
572
    Record *WI = R.getClass("WInst");
573
    Record *SOpI = R.getClass("SOpInst");
574
    Record *IOpI = R.getClass("IOpInst");
575
    Record *WOpI = R.getClass("WOpInst");
576
    Record *LOpI = R.getClass("LOpInst");
577
    Record *NoTestOpI = R.getClass("NoTestOpInst");
578

579
    ClassMap[SI] = ClassS;
580
    ClassMap[II] = ClassI;
581
    ClassMap[WI] = ClassW;
582
    ClassMap[SOpI] = ClassS;
583
    ClassMap[IOpI] = ClassI;
584
    ClassMap[WOpI] = ClassW;
585
    ClassMap[LOpI] = ClassL;
586
    ClassMap[NoTestOpI] = ClassNoTest;
587
  }
588

589
  // Emit arm_neon.h.inc
590
  void run(raw_ostream &o);
591

592
  // Emit arm_fp16.h.inc
593
  void runFP16(raw_ostream &o);
594

595
  // Emit arm_bf16.h.inc
596
  void runBF16(raw_ostream &o);
597

598
  void runVectorTypes(raw_ostream &o);
599

600
  // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and
601
  // arm_bf16.h
602
  void runHeader(raw_ostream &o);
603
};
604

605
} // end anonymous namespace
606

607
//===----------------------------------------------------------------------===//
608
// Type implementation
609
//===----------------------------------------------------------------------===//
610

611
std::string Type::str() const {
612
  if (isVoid())
613
    return "void";
614
  std::string S;
615

616
  if (isInteger() && !isSigned())
617
    S += "u";
618

619
  if (isPoly())
620
    S += "poly";
621
  else if (isFloating())
622
    S += "float";
623
  else if (isBFloat16())
624
    S += "bfloat";
625
  else
626
    S += "int";
627

628
  S += utostr(ElementBitwidth);
629
  if (isVector())
630
    S += "x" + utostr(getNumElements());
631
  if (NumVectors > 1)
632
    S += "x" + utostr(NumVectors);
633
  S += "_t";
634

635
  if (Constant)
636
    S += " const";
637
  if (Pointer)
638
    S += " *";
639

640
  return S;
641
}
642

643
std::string Type::builtin_str() const {
644
  std::string S;
645
  if (isVoid())
646
    return "v";
647

648
  if (isPointer()) {
649
    // All pointers are void pointers.
650
    S = "v";
651
    if (isConstPointer())
652
      S += "C";
653
    S += "*";
654
    return S;
655
  } else if (isInteger())
656
    switch (ElementBitwidth) {
657
    case 8: S += "c"; break;
658
    case 16: S += "s"; break;
659
    case 32: S += "i"; break;
660
    case 64: S += "Wi"; break;
661
    case 128: S += "LLLi"; break;
662
    default: llvm_unreachable("Unhandled case!");
663
    }
664
  else if (isBFloat16()) {
665
    assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits");
666
    S += "y";
667
  } else
668
    switch (ElementBitwidth) {
669
    case 16: S += "h"; break;
670
    case 32: S += "f"; break;
671
    case 64: S += "d"; break;
672
    default: llvm_unreachable("Unhandled case!");
673
    }
674

675
  // FIXME: NECESSARY???????????????????????????????????????????????????????????????????????
676
  if (isChar() && !isPointer() && isSigned())
677
    // Make chars explicitly signed.
678
    S = "S" + S;
679
  else if (isInteger() && !isSigned())
680
    S = "U" + S;
681

682
  // Constant indices are "int", but have the "constant expression" modifier.
683
  if (isImmediate()) {
684
    assert(isInteger() && isSigned());
685
    S = "I" + S;
686
  }
687

688
  if (isScalar())
689
    return S;
690

691
  std::string Ret;
692
  for (unsigned I = 0; I < NumVectors; ++I)
693
    Ret += "V" + utostr(getNumElements()) + S;
694

695
  return Ret;
696
}
697

698
unsigned Type::getNeonEnum() const {
699
  unsigned Addend;
700
  switch (ElementBitwidth) {
701
  case 8: Addend = 0; break;
702
  case 16: Addend = 1; break;
703
  case 32: Addend = 2; break;
704
  case 64: Addend = 3; break;
705
  case 128: Addend = 4; break;
706
  default: llvm_unreachable("Unhandled element bitwidth!");
707
  }
708

709
  unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
710
  if (isPoly()) {
711
    // Adjustment needed because Poly32 doesn't exist.
712
    if (Addend >= 2)
713
      --Addend;
714
    Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
715
  }
716
  if (isFloating()) {
717
    assert(Addend != 0 && "Float8 doesn't exist!");
718
    Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
719
  }
720

721
  if (isBFloat16()) {
722
    assert(Addend == 1 && "BFloat16 is only 16 bit");
723
    Base = (unsigned)NeonTypeFlags::BFloat16;
724
  }
725

726
  if (Bitwidth == 128)
727
    Base |= (unsigned)NeonTypeFlags::QuadFlag;
728
  if (isInteger() && !isSigned())
729
    Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
730

731
  return Base;
732
}
733

734
Type Type::fromTypedefName(StringRef Name) {
735
  Type T;
736
  T.Kind = SInt;
737

738
  if (Name.consume_front("u"))
739
    T.Kind = UInt;
740

741
  if (Name.consume_front("float")) {
742
    T.Kind = Float;
743
  } else if (Name.consume_front("poly")) {
744
    T.Kind = Poly;
745
  } else if (Name.consume_front("bfloat")) {
746
    T.Kind = BFloat16;
747
  } else {
748
    assert(Name.starts_with("int"));
749
    Name = Name.drop_front(3);
750
  }
751

752
  unsigned I = 0;
753
  for (I = 0; I < Name.size(); ++I) {
754
    if (!isdigit(Name[I]))
755
      break;
756
  }
757
  Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
758
  Name = Name.drop_front(I);
759

760
  T.Bitwidth = T.ElementBitwidth;
761
  T.NumVectors = 1;
762

763
  if (Name.consume_front("x")) {
764
    unsigned I = 0;
765
    for (I = 0; I < Name.size(); ++I) {
766
      if (!isdigit(Name[I]))
767
        break;
768
    }
769
    unsigned NumLanes;
770
    Name.substr(0, I).getAsInteger(10, NumLanes);
771
    Name = Name.drop_front(I);
772
    T.Bitwidth = T.ElementBitwidth * NumLanes;
773
  } else {
774
    // Was scalar.
775
    T.NumVectors = 0;
776
  }
777
  if (Name.consume_front("x")) {
778
    unsigned I = 0;
779
    for (I = 0; I < Name.size(); ++I) {
780
      if (!isdigit(Name[I]))
781
        break;
782
    }
783
    Name.substr(0, I).getAsInteger(10, T.NumVectors);
784
    Name = Name.drop_front(I);
785
  }
786

787
  assert(Name.starts_with("_t") && "Malformed typedef!");
788
  return T;
789
}
790

791
void Type::applyTypespec(bool &Quad) {
792
  std::string S = TS;
793
  ScalarForMangling = false;
794
  Kind = SInt;
795
  ElementBitwidth = ~0U;
796
  NumVectors = 1;
797

798
  for (char I : S) {
799
    switch (I) {
800
    case 'S':
801
      ScalarForMangling = true;
802
      break;
803
    case 'H':
804
      NoManglingQ = true;
805
      Quad = true;
806
      break;
807
    case 'Q':
808
      Quad = true;
809
      break;
810
    case 'P':
811
      Kind = Poly;
812
      break;
813
    case 'U':
814
      Kind = UInt;
815
      break;
816
    case 'c':
817
      ElementBitwidth = 8;
818
      break;
819
    case 'h':
820
      Kind = Float;
821
      [[fallthrough]];
822
    case 's':
823
      ElementBitwidth = 16;
824
      break;
825
    case 'f':
826
      Kind = Float;
827
      [[fallthrough]];
828
    case 'i':
829
      ElementBitwidth = 32;
830
      break;
831
    case 'd':
832
      Kind = Float;
833
      [[fallthrough]];
834
    case 'l':
835
      ElementBitwidth = 64;
836
      break;
837
    case 'k':
838
      ElementBitwidth = 128;
839
      // Poly doesn't have a 128x1 type.
840
      if (isPoly())
841
        NumVectors = 0;
842
      break;
843
    case 'b':
844
      Kind = BFloat16;
845
      ElementBitwidth = 16;
846
      break;
847
    default:
848
      llvm_unreachable("Unhandled type code!");
849
    }
850
  }
851
  assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
852

853
  Bitwidth = Quad ? 128 : 64;
854
}
855

856
void Type::applyModifiers(StringRef Mods) {
857
  bool AppliedQuad = false;
858
  applyTypespec(AppliedQuad);
859

860
  for (char Mod : Mods) {
861
    switch (Mod) {
862
    case '.':
863
      break;
864
    case 'v':
865
      Kind = Void;
866
      break;
867
    case 'S':
868
      Kind = SInt;
869
      break;
870
    case 'U':
871
      Kind = UInt;
872
      break;
873
    case 'B':
874
      Kind = BFloat16;
875
      ElementBitwidth = 16;
876
      break;
877
    case 'F':
878
      Kind = Float;
879
      break;
880
    case 'P':
881
      Kind = Poly;
882
      break;
883
    case '>':
884
      assert(ElementBitwidth < 128);
885
      ElementBitwidth *= 2;
886
      break;
887
    case '<':
888
      assert(ElementBitwidth > 8);
889
      ElementBitwidth /= 2;
890
      break;
891
    case '1':
892
      NumVectors = 0;
893
      break;
894
    case '2':
895
      NumVectors = 2;
896
      break;
897
    case '3':
898
      NumVectors = 3;
899
      break;
900
    case '4':
901
      NumVectors = 4;
902
      break;
903
    case '*':
904
      Pointer = true;
905
      break;
906
    case 'c':
907
      Constant = true;
908
      break;
909
    case 'Q':
910
      Bitwidth = 128;
911
      break;
912
    case 'q':
913
      Bitwidth = 64;
914
      break;
915
    case 'I':
916
      Kind = SInt;
917
      ElementBitwidth = Bitwidth = 32;
918
      NumVectors = 0;
919
      Immediate = true;
920
      break;
921
    case 'p':
922
      if (isPoly())
923
        Kind = UInt;
924
      break;
925
    case '!':
926
      // Key type, handled elsewhere.
927
      break;
928
    default:
929
      llvm_unreachable("Unhandled character!");
930
    }
931
  }
932
}
933

934
//===----------------------------------------------------------------------===//
935
// Intrinsic implementation
936
//===----------------------------------------------------------------------===//
937

938
StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const {
939
  if (Proto.size() == Pos)
940
    return StringRef();
941
  else if (Proto[Pos] != '(')
942
    return Proto.substr(Pos++, 1);
943

944
  size_t Start = Pos + 1;
945
  size_t End = Proto.find(')', Start);
946
  assert_with_loc(End != StringRef::npos, "unmatched modifier group paren");
947
  Pos = End + 1;
948
  return Proto.slice(Start, End);
949
}
950

951
std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
952
  char typeCode = '\0';
953
  bool printNumber = true;
954

955
  if (CK == ClassB && TargetGuard == "neon")
956
    return "";
957

958
  if (T.isBFloat16())
959
    return "bf16";
960

961
  if (T.isPoly())
962
    typeCode = 'p';
963
  else if (T.isInteger())
964
    typeCode = T.isSigned() ? 's' : 'u';
965
  else
966
    typeCode = 'f';
967

968
  if (CK == ClassI) {
969
    switch (typeCode) {
970
    default:
971
      break;
972
    case 's':
973
    case 'u':
974
    case 'p':
975
      typeCode = 'i';
976
      break;
977
    }
978
  }
979
  if (CK == ClassB && TargetGuard == "neon") {
980
    typeCode = '\0';
981
  }
982

983
  std::string S;
984
  if (typeCode != '\0')
985
    S.push_back(typeCode);
986
  if (printNumber)
987
    S += utostr(T.getElementSizeInBits());
988

989
  return S;
990
}
991

992
std::string Intrinsic::getBuiltinTypeStr() {
993
  ClassKind LocalCK = getClassKind(true);
994
  std::string S;
995

996
  Type RetT = getReturnType();
997
  if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
998
      !RetT.isFloating() && !RetT.isBFloat16())
999
    RetT.makeInteger(RetT.getElementSizeInBits(), false);
1000

1001
  // Since the return value must be one type, return a vector type of the
1002
  // appropriate width which we will bitcast.  An exception is made for
1003
  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
1004
  // fashion, storing them to a pointer arg.
1005
  if (RetT.getNumVectors() > 1) {
1006
    S += "vv*"; // void result with void* first argument
1007
  } else {
1008
    if (RetT.isPoly())
1009
      RetT.makeInteger(RetT.getElementSizeInBits(), false);
1010
    if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned())
1011
      RetT.makeSigned();
1012

1013
    if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar())
1014
      // Cast to vector of 8-bit elements.
1015
      RetT.makeInteger(8, true);
1016

1017
    S += RetT.builtin_str();
1018
  }
1019

1020
  for (unsigned I = 0; I < getNumParams(); ++I) {
1021
    Type T = getParamType(I);
1022
    if (T.isPoly())
1023
      T.makeInteger(T.getElementSizeInBits(), false);
1024

1025
    if (LocalCK == ClassB && !T.isScalar())
1026
      T.makeInteger(8, true);
1027
    // Halves always get converted to 8-bit elements.
1028
    if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
1029
      T.makeInteger(8, true);
1030

1031
    if (LocalCK == ClassI && T.isInteger())
1032
      T.makeSigned();
1033

1034
    if (hasImmediate() && getImmediateIdx() == I)
1035
      T.makeImmediate(32);
1036

1037
    S += T.builtin_str();
1038
  }
1039

1040
  // Extra constant integer to hold type class enum for this function, e.g. s8
1041
  if (LocalCK == ClassB)
1042
    S += "i";
1043

1044
  return S;
1045
}
1046

1047
std::string Intrinsic::getMangledName(bool ForceClassS) const {
1048
  // Check if the prototype has a scalar operand with the type of the vector
1049
  // elements.  If not, bitcasting the args will take care of arg checking.
1050
  // The actual signedness etc. will be taken care of with special enums.
1051
  ClassKind LocalCK = CK;
1052
  if (!protoHasScalar())
1053
    LocalCK = ClassB;
1054

1055
  return mangleName(Name, ForceClassS ? ClassS : LocalCK);
1056
}
1057

1058
std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
1059
  std::string typeCode = getInstTypeCode(BaseType, LocalCK);
1060
  std::string S = Name;
1061

1062
  if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
1063
      Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" ||
1064
      Name == "vcvt_f32_bf16")
1065
    return Name;
1066

1067
  if (!typeCode.empty()) {
1068
    // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
1069
    if (Name.size() >= 3 && isdigit(Name.back()) &&
1070
        Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
1071
      S.insert(S.length() - 3, "_" + typeCode);
1072
    else
1073
      S += "_" + typeCode;
1074
  }
1075

1076
  if (BaseType != InBaseType) {
1077
    // A reinterpret - out the input base type at the end.
1078
    S += "_" + getInstTypeCode(InBaseType, LocalCK);
1079
  }
1080

1081
  if (LocalCK == ClassB && TargetGuard == "neon")
1082
    S += "_v";
1083

1084
  // Insert a 'q' before the first '_' character so that it ends up before
1085
  // _lane or _n on vector-scalar operations.
1086
  if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
1087
    size_t Pos = S.find('_');
1088
    S.insert(Pos, "q");
1089
  }
1090

1091
  char Suffix = '\0';
1092
  if (BaseType.isScalarForMangling()) {
1093
    switch (BaseType.getElementSizeInBits()) {
1094
    case 8: Suffix = 'b'; break;
1095
    case 16: Suffix = 'h'; break;
1096
    case 32: Suffix = 's'; break;
1097
    case 64: Suffix = 'd'; break;
1098
    default: llvm_unreachable("Bad suffix!");
1099
    }
1100
  }
1101
  if (Suffix != '\0') {
1102
    size_t Pos = S.find('_');
1103
    S.insert(Pos, &Suffix, 1);
1104
  }
1105

1106
  return S;
1107
}
1108

1109
std::string Intrinsic::replaceParamsIn(std::string S) {
1110
  while (S.find('$') != std::string::npos) {
1111
    size_t Pos = S.find('$');
1112
    size_t End = Pos + 1;
1113
    while (isalpha(S[End]))
1114
      ++End;
1115

1116
    std::string VarName = S.substr(Pos + 1, End - Pos - 1);
1117
    assert_with_loc(Variables.find(VarName) != Variables.end(),
1118
                    "Variable not defined!");
1119
    S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
1120
  }
1121

1122
  return S;
1123
}
1124

1125
void Intrinsic::initVariables() {
1126
  Variables.clear();
1127

1128
  // Modify the TypeSpec per-argument to get a concrete Type, and create
1129
  // known variables for each.
1130
  for (unsigned I = 1; I < Types.size(); ++I) {
1131
    char NameC = '0' + (I - 1);
1132
    std::string Name = "p";
1133
    Name.push_back(NameC);
1134

1135
    Variables[Name] = Variable(Types[I], Name + VariablePostfix);
1136
  }
1137
  RetVar = Variable(Types[0], "ret" + VariablePostfix);
1138
}
1139

1140
void Intrinsic::emitPrototype(StringRef NamePrefix) {
1141
  if (UseMacro) {
1142
    OS << "#define ";
1143
  } else {
1144
    OS << "__ai ";
1145
    if (TargetGuard != "")
1146
      OS << "__attribute__((target(\"" << TargetGuard << "\"))) ";
1147
    OS << Types[0].str() << " ";
1148
  }
1149

1150
  OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";
1151

1152
  for (unsigned I = 0; I < getNumParams(); ++I) {
1153
    if (I != 0)
1154
      OS << ", ";
1155

1156
    char NameC = '0' + I;
1157
    std::string Name = "p";
1158
    Name.push_back(NameC);
1159
    assert(Variables.find(Name) != Variables.end());
1160
    Variable &V = Variables[Name];
1161

1162
    if (!UseMacro)
1163
      OS << V.getType().str() << " ";
1164
    OS << V.getName();
1165
  }
1166

1167
  OS << ")";
1168
}
1169

1170
void Intrinsic::emitOpeningBrace() {
1171
  if (UseMacro)
1172
    OS << " __extension__ ({";
1173
  else
1174
    OS << " {";
1175
  emitNewLine();
1176
}
1177

1178
void Intrinsic::emitClosingBrace() {
1179
  if (UseMacro)
1180
    OS << "})";
1181
  else
1182
    OS << "}";
1183
}
1184

1185
void Intrinsic::emitNewLine() {
1186
  if (UseMacro)
1187
    OS << " \\\n";
1188
  else
1189
    OS << "\n";
1190
}
1191

1192
void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
1193
  if (Dest.getType().getNumVectors() > 1) {
1194
    emitNewLine();
1195

1196
    for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
1197
      OS << "  " << Dest.getName() << ".val[" << K << "] = "
1198
         << "__builtin_shufflevector("
1199
         << Src.getName() << ".val[" << K << "], "
1200
         << Src.getName() << ".val[" << K << "]";
1201
      for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
1202
        OS << ", " << J;
1203
      OS << ");";
1204
      emitNewLine();
1205
    }
1206
  } else {
1207
    OS << "  " << Dest.getName()
1208
       << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName();
1209
    for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
1210
      OS << ", " << J;
1211
    OS << ");";
1212
    emitNewLine();
1213
  }
1214
}
1215

1216
void Intrinsic::emitArgumentReversal() {
1217
  if (isBigEndianSafe())
1218
    return;
1219

1220
  // Reverse all vector arguments.
1221
  for (unsigned I = 0; I < getNumParams(); ++I) {
1222
    std::string Name = "p" + utostr(I);
1223
    std::string NewName = "rev" + utostr(I);
1224

1225
    Variable &V = Variables[Name];
1226
    Variable NewV(V.getType(), NewName + VariablePostfix);
1227

1228
    if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
1229
      continue;
1230

1231
    OS << "  " << NewV.getType().str() << " " << NewV.getName() << ";";
1232
    emitReverseVariable(NewV, V);
1233
    V = NewV;
1234
  }
1235
}
1236

1237
void Intrinsic::emitReturnVarDecl() {
1238
  assert(RetVar.getType() == Types[0]);
1239
  // Create a return variable, if we're not void.
1240
  if (!RetVar.getType().isVoid()) {
1241
    OS << "  " << RetVar.getType().str() << " " << RetVar.getName() << ";";
1242
    emitNewLine();
1243
  }
1244
}
1245

1246
void Intrinsic::emitReturnReversal() {
1247
  if (isBigEndianSafe())
1248
    return;
1249
  if (!getReturnType().isVector() || getReturnType().isVoid() ||
1250
      getReturnType().getNumElements() == 1)
1251
    return;
1252
  emitReverseVariable(RetVar, RetVar);
1253
}
1254

1255
void Intrinsic::emitShadowedArgs() {
1256
  // Macro arguments are not type-checked like inline function arguments,
1257
  // so assign them to local temporaries to get the right type checking.
1258
  if (!UseMacro)
1259
    return;
1260

1261
  for (unsigned I = 0; I < getNumParams(); ++I) {
1262
    // Do not create a temporary for an immediate argument.
1263
    // That would defeat the whole point of using a macro!
1264
    if (getParamType(I).isImmediate())
1265
      continue;
1266
    // Do not create a temporary for pointer arguments. The input
1267
    // pointer may have an alignment hint.
1268
    if (getParamType(I).isPointer())
1269
      continue;
1270

1271
    std::string Name = "p" + utostr(I);
1272

1273
    assert(Variables.find(Name) != Variables.end());
1274
    Variable &V = Variables[Name];
1275

1276
    std::string NewName = "s" + utostr(I);
1277
    Variable V2(V.getType(), NewName + VariablePostfix);
1278

1279
    OS << "  " << V2.getType().str() << " " << V2.getName() << " = "
1280
       << V.getName() << ";";
1281
    emitNewLine();
1282

1283
    V = V2;
1284
  }
1285
}
1286

1287
bool Intrinsic::protoHasScalar() const {
1288
  return llvm::any_of(
1289
      Types, [](const Type &T) { return T.isScalar() && !T.isImmediate(); });
1290
}
1291

1292
void Intrinsic::emitBodyAsBuiltinCall() {
1293
  std::string S;
1294

1295
  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1296
  // sret-like argument.
1297
  bool SRet = getReturnType().getNumVectors() >= 2;
1298

1299
  StringRef N = Name;
1300
  ClassKind LocalCK = CK;
1301
  if (!protoHasScalar())
1302
    LocalCK = ClassB;
1303

1304
  if (!getReturnType().isVoid() && !SRet)
1305
    S += "(" + RetVar.getType().str() + ") ";
1306

1307
  S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "(";
1308

1309
  if (SRet)
1310
    S += "&" + RetVar.getName() + ", ";
1311

1312
  for (unsigned I = 0; I < getNumParams(); ++I) {
1313
    Variable &V = Variables["p" + utostr(I)];
1314
    Type T = V.getType();
1315

1316
    // Handle multiple-vector values specially, emitting each subvector as an
1317
    // argument to the builtin.
1318
    if (T.getNumVectors() > 1) {
1319
      // Check if an explicit cast is needed.
1320
      std::string Cast;
1321
      if (LocalCK == ClassB) {
1322
        Type T2 = T;
1323
        T2.makeOneVector();
1324
        T2.makeInteger(8, /*Sign=*/true);
1325
        Cast = "(" + T2.str() + ")";
1326
      }
1327

1328
      for (unsigned J = 0; J < T.getNumVectors(); ++J)
1329
        S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
1330
      continue;
1331
    }
1332

1333
    std::string Arg = V.getName();
1334
    Type CastToType = T;
1335

1336
    // Check if an explicit cast is needed.
1337
    if (CastToType.isVector() &&
1338
        (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) {
1339
      CastToType.makeInteger(8, true);
1340
      Arg = "(" + CastToType.str() + ")" + Arg;
1341
    } else if (CastToType.isVector() && LocalCK == ClassI) {
1342
      if (CastToType.isInteger())
1343
        CastToType.makeSigned();
1344
      Arg = "(" + CastToType.str() + ")" + Arg;
1345
    }
1346

1347
    S += Arg + ", ";
1348
  }
1349

1350
  // Extra constant integer to hold type class enum for this function, e.g. s8
1351
  if (getClassKind(true) == ClassB) {
1352
    S += utostr(getPolymorphicKeyType().getNeonEnum());
1353
  } else {
1354
    // Remove extraneous ", ".
1355
    S.pop_back();
1356
    S.pop_back();
1357
  }
1358
  S += ");";
1359

1360
  std::string RetExpr;
1361
  if (!SRet && !RetVar.getType().isVoid())
1362
    RetExpr = RetVar.getName() + " = ";
1363

1364
  OS << "  " << RetExpr << S;
1365
  emitNewLine();
1366
}
1367

1368
void Intrinsic::emitBody(StringRef CallPrefix) {
1369
  std::vector<std::string> Lines;
1370

1371
  if (!Body || Body->getValues().empty()) {
1372
    // Nothing specific to output - must output a builtin.
1373
    emitBodyAsBuiltinCall();
1374
    return;
1375
  }
1376

1377
  // We have a list of "things to output". The last should be returned.
1378
  for (auto *I : Body->getValues()) {
1379
    if (StringInit *SI = dyn_cast<StringInit>(I)) {
1380
      Lines.push_back(replaceParamsIn(SI->getAsString()));
1381
    } else if (DagInit *DI = dyn_cast<DagInit>(I)) {
1382
      DagEmitter DE(*this, CallPrefix);
1383
      Lines.push_back(DE.emitDag(DI).second + ";");
1384
    }
1385
  }
1386

1387
  assert(!Lines.empty() && "Empty def?");
1388
  if (!RetVar.getType().isVoid())
1389
    Lines.back().insert(0, RetVar.getName() + " = ");
1390

1391
  for (auto &L : Lines) {
1392
    OS << "  " << L;
1393
    emitNewLine();
1394
  }
1395
}
1396

1397
void Intrinsic::emitReturn() {
1398
  if (RetVar.getType().isVoid())
1399
    return;
1400
  if (UseMacro)
1401
    OS << "  " << RetVar.getName() << ";";
1402
  else
1403
    OS << "  return " << RetVar.getName() << ";";
1404
  emitNewLine();
1405
}
1406

1407
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {
1408
  // At this point we should only be seeing a def.
1409
  DefInit *DefI = cast<DefInit>(DI->getOperator());
1410
  std::string Op = DefI->getAsString();
1411

1412
  if (Op == "cast" || Op == "bitcast")
1413
    return emitDagCast(DI, Op == "bitcast");
1414
  if (Op == "shuffle")
1415
    return emitDagShuffle(DI);
1416
  if (Op == "dup")
1417
    return emitDagDup(DI);
1418
  if (Op == "dup_typed")
1419
    return emitDagDupTyped(DI);
1420
  if (Op == "splat")
1421
    return emitDagSplat(DI);
1422
  if (Op == "save_temp")
1423
    return emitDagSaveTemp(DI);
1424
  if (Op == "op")
1425
    return emitDagOp(DI);
1426
  if (Op == "call" || Op == "call_mangled")
1427
    return emitDagCall(DI, Op == "call_mangled");
1428
  if (Op == "name_replace")
1429
    return emitDagNameReplace(DI);
1430
  if (Op == "literal")
1431
    return emitDagLiteral(DI);
1432
  assert_with_loc(false, "Unknown operation!");
1433
  return std::make_pair(Type::getVoid(), "");
1434
}
1435

1436
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {
1437
  std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1438
  if (DI->getNumArgs() == 2) {
1439
    // Unary op.
1440
    std::pair<Type, std::string> R =
1441
        emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1442
    return std::make_pair(R.first, Op + R.second);
1443
  } else {
1444
    assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
1445
    std::pair<Type, std::string> R1 =
1446
        emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1447
    std::pair<Type, std::string> R2 =
1448
        emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2)));
1449
    assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
1450
    return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
1451
  }
1452
}
1453

1454
std::pair<Type, std::string>
1455
Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) {
1456
  std::vector<Type> Types;
1457
  std::vector<std::string> Values;
1458
  for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1459
    std::pair<Type, std::string> R =
1460
        emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1)));
1461
    Types.push_back(R.first);
1462
    Values.push_back(R.second);
1463
  }
1464

1465
  // Look up the called intrinsic.
1466
  std::string N;
1467
  if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))
1468
    N = SI->getAsUnquotedString();
1469
  else
1470
    N = emitDagArg(DI->getArg(0), "").second;
1471
  std::optional<std::string> MangledName;
1472
  if (MatchMangledName) {
1473
    if (Intr.getRecord()->getValueAsBit("isLaneQ"))
1474
      N += "q";
1475
    MangledName = Intr.mangleName(N, ClassS);
1476
  }
1477
  Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName);
1478

1479
  // Make sure the callee is known as an early def.
1480
  Callee.setNeededEarly();
1481
  Intr.Dependencies.insert(&Callee);
1482

1483
  // Now create the call itself.
1484
  std::string S;
1485
  if (!Callee.isBigEndianSafe())
1486
    S += CallPrefix.str();
1487
  S += Callee.getMangledName(true) + "(";
1488
  for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1489
    if (I != 0)
1490
      S += ", ";
1491
    S += Values[I];
1492
  }
1493
  S += ")";
1494

1495
  return std::make_pair(Callee.getReturnType(), S);
1496
}
1497

1498
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
1499
                                                                bool IsBitCast){
1500
  // (cast MOD* VAL) -> cast VAL to type given by MOD.
1501
  std::pair<Type, std::string> R =
1502
      emitDagArg(DI->getArg(DI->getNumArgs() - 1),
1503
                 std::string(DI->getArgNameStr(DI->getNumArgs() - 1)));
1504
  Type castToType = R.first;
1505
  for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
1506

1507
    // MOD can take several forms:
1508
    //   1. $X - take the type of parameter / variable X.
1509
    //   2. The value "R" - take the type of the return type.
1510
    //   3. a type string
1511
    //   4. The value "U" or "S" to switch the signedness.
1512
    //   5. The value "H" or "D" to half or double the bitwidth.
1513
    //   6. The value "8" to convert to 8-bit (signed) integer lanes.
1514
    if (!DI->getArgNameStr(ArgIdx).empty()) {
1515
      assert_with_loc(Intr.Variables.find(std::string(
1516
                          DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(),
1517
                      "Variable not found");
1518
      castToType =
1519
          Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType();
1520
    } else {
1521
      StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
1522
      assert_with_loc(SI, "Expected string type or $Name for cast type");
1523

1524
      if (SI->getAsUnquotedString() == "R") {
1525
        castToType = Intr.getReturnType();
1526
      } else if (SI->getAsUnquotedString() == "U") {
1527
        castToType.makeUnsigned();
1528
      } else if (SI->getAsUnquotedString() == "S") {
1529
        castToType.makeSigned();
1530
      } else if (SI->getAsUnquotedString() == "H") {
1531
        castToType.halveLanes();
1532
      } else if (SI->getAsUnquotedString() == "D") {
1533
        castToType.doubleLanes();
1534
      } else if (SI->getAsUnquotedString() == "8") {
1535
        castToType.makeInteger(8, true);
1536
      } else if (SI->getAsUnquotedString() == "32") {
1537
        castToType.make32BitElement();
1538
      } else {
1539
        castToType = Type::fromTypedefName(SI->getAsUnquotedString());
1540
        assert_with_loc(!castToType.isVoid(), "Unknown typedef");
1541
      }
1542
    }
1543
  }
1544

1545
  std::string S;
1546
  if (IsBitCast) {
1547
    // Emit a reinterpret cast. The second operand must be an lvalue, so create
1548
    // a temporary.
1549
    std::string N = "reint";
1550
    unsigned I = 0;
1551
    while (Intr.Variables.find(N) != Intr.Variables.end())
1552
      N = "reint" + utostr(++I);
1553
    Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix);
1554

1555
    Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = "
1556
            << R.second << ";";
1557
    Intr.emitNewLine();
1558

1559
    S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + "";
1560
  } else {
1561
    // Emit a normal (static) cast.
1562
    S = "(" + castToType.str() + ")(" + R.second + ")";
1563
  }
1564

1565
  return std::make_pair(castToType, S);
1566
}
1567

1568
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
1569
  // See the documentation in arm_neon.td for a description of these operators.
1570
  class LowHalf : public SetTheory::Operator {
1571
  public:
1572
    void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
1573
               ArrayRef<SMLoc> Loc) override {
1574
      SetTheory::RecSet Elts2;
1575
      ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
1576
      Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
1577
    }
1578
  };
1579

1580
  class HighHalf : public SetTheory::Operator {
1581
  public:
1582
    void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
1583
               ArrayRef<SMLoc> Loc) override {
1584
      SetTheory::RecSet Elts2;
1585
      ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
1586
      Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
1587
    }
1588
  };
1589

1590
  class Rev : public SetTheory::Operator {
1591
    unsigned ElementSize;
1592

1593
  public:
1594
    Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
1595

1596
    void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
1597
               ArrayRef<SMLoc> Loc) override {
1598
      SetTheory::RecSet Elts2;
1599
      ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);
1600

1601
      int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
1602
      VectorSize /= ElementSize;
1603

1604
      std::vector<Record *> Revved;
1605
      for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
1606
        for (int LI = VectorSize - 1; LI >= 0; --LI) {
1607
          Revved.push_back(Elts2[VI + LI]);
1608
        }
1609
      }
1610

1611
      Elts.insert(Revved.begin(), Revved.end());
1612
    }
1613
  };
1614

1615
  class MaskExpander : public SetTheory::Expander {
1616
    unsigned N;
1617

1618
  public:
1619
    MaskExpander(unsigned N) : N(N) {}
1620

1621
    void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override {
1622
      unsigned Addend = 0;
1623
      if (R->getName() == "mask0")
1624
        Addend = 0;
1625
      else if (R->getName() == "mask1")
1626
        Addend = N;
1627
      else
1628
        return;
1629
      for (unsigned I = 0; I < N; ++I)
1630
        Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
1631
    }
1632
  };
1633

1634
  // (shuffle arg1, arg2, sequence)
1635
  std::pair<Type, std::string> Arg1 =
1636
      emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
1637
  std::pair<Type, std::string> Arg2 =
1638
      emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1639
  assert_with_loc(Arg1.first == Arg2.first,
1640
                  "Different types in arguments to shuffle!");
1641

1642
  SetTheory ST;
1643
  SetTheory::RecSet Elts;
1644
  ST.addOperator("lowhalf", std::make_unique<LowHalf>());
1645
  ST.addOperator("highhalf", std::make_unique<HighHalf>());
1646
  ST.addOperator("rev",
1647
                 std::make_unique<Rev>(Arg1.first.getElementSizeInBits()));
1648
  ST.addExpander("MaskExpand",
1649
                 std::make_unique<MaskExpander>(Arg1.first.getNumElements()));
1650
  ST.evaluate(DI->getArg(2), Elts, std::nullopt);
1651

1652
  std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
1653
  for (auto &E : Elts) {
1654
    StringRef Name = E->getName();
1655
    assert_with_loc(Name.starts_with("sv"),
1656
                    "Incorrect element kind in shuffle mask!");
1657
    S += ", " + Name.drop_front(2).str();
1658
  }
1659
  S += ")";
1660

1661
  // Recalculate the return type - the shuffle may have halved or doubled it.
1662
  Type T(Arg1.first);
1663
  if (Elts.size() > T.getNumElements()) {
1664
    assert_with_loc(
1665
        Elts.size() == T.getNumElements() * 2,
1666
        "Can only double or half the number of elements in a shuffle!");
1667
    T.doubleLanes();
1668
  } else if (Elts.size() < T.getNumElements()) {
1669
    assert_with_loc(
1670
        Elts.size() == T.getNumElements() / 2,
1671
        "Can only double or half the number of elements in a shuffle!");
1672
    T.halveLanes();
1673
  }
1674

1675
  return std::make_pair(T, S);
1676
}
1677

1678
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
1679
  assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
1680
  std::pair<Type, std::string> A =
1681
      emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
1682
  assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
1683

1684
  Type T = Intr.getBaseType();
1685
  assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
1686
  std::string S = "(" + T.str() + ") {";
1687
  for (unsigned I = 0; I < T.getNumElements(); ++I) {
1688
    if (I != 0)
1689
      S += ", ";
1690
    S += A.second;
1691
  }
1692
  S += "}";
1693

1694
  return std::make_pair(T, S);
1695
}
1696

1697
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) {
1698
  assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments");
1699
  std::pair<Type, std::string> B =
1700
      emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1701
  assert_with_loc(B.first.isScalar(),
1702
                  "dup_typed() requires a scalar as the second argument");
1703
  Type T;
1704
  // If the type argument is a constant string, construct the type directly.
1705
  if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) {
1706
    T = Type::fromTypedefName(SI->getAsUnquotedString());
1707
    assert_with_loc(!T.isVoid(), "Unknown typedef");
1708
  } else
1709
    T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first;
1710

1711
  assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!");
1712
  std::string S = "(" + T.str() + ") {";
1713
  for (unsigned I = 0; I < T.getNumElements(); ++I) {
1714
    if (I != 0)
1715
      S += ", ";
1716
    S += B.second;
1717
  }
1718
  S += "}";
1719

1720
  return std::make_pair(T, S);
1721
}
1722

1723
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
1724
  assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
1725
  std::pair<Type, std::string> A =
1726
      emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
1727
  std::pair<Type, std::string> B =
1728
      emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1729

1730
  assert_with_loc(B.first.isScalar(),
1731
                  "splat() requires a scalar int as the second argument");
1732

1733
  std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
1734
  for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
1735
    S += ", " + B.second;
1736
  }
1737
  S += ")";
1738

1739
  return std::make_pair(Intr.getBaseType(), S);
1740
}
1741

1742
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {
1743
  assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
1744
  std::pair<Type, std::string> A =
1745
      emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
1746

1747
  assert_with_loc(!A.first.isVoid(),
1748
                  "Argument to save_temp() must have non-void type!");
1749

1750
  std::string N = std::string(DI->getArgNameStr(0));
1751
  assert_with_loc(!N.empty(),
1752
                  "save_temp() expects a name as the first argument");
1753

1754
  assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(),
1755
                  "Variable already defined!");
1756
  Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix);
1757

1758
  std::string S =
1759
      A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second;
1760

1761
  return std::make_pair(Type::getVoid(), S);
1762
}
1763

1764
std::pair<Type, std::string>
1765
Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) {
1766
  std::string S = Intr.Name;
1767

1768
  assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
1769
  std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1770
  std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
1771

1772
  size_t Idx = S.find(ToReplace);
1773

1774
  assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
1775
  S.replace(Idx, ToReplace.size(), ReplaceWith);
1776

1777
  return std::make_pair(Type::getVoid(), S);
1778
}
1779

1780
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){
1781
  std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1782
  std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
1783
  return std::make_pair(Type::fromTypedefName(Ty), Value);
1784
}
1785

1786
std::pair<Type, std::string>
1787
Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) {
1788
  if (!ArgName.empty()) {
1789
    assert_with_loc(!Arg->isComplete(),
1790
                    "Arguments must either be DAGs or names, not both!");
1791
    assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
1792
                    "Variable not defined!");
1793
    Variable &V = Intr.Variables[ArgName];
1794
    return std::make_pair(V.getType(), V.getName());
1795
  }
1796

1797
  assert(Arg && "Neither ArgName nor Arg?!");
1798
  DagInit *DI = dyn_cast<DagInit>(Arg);
1799
  assert_with_loc(DI, "Arguments must either be DAGs or names!");
1800

1801
  return emitDag(DI);
1802
}
1803

1804
std::string Intrinsic::generate() {
1805
  // Avoid duplicated code for big and little endian
1806
  if (isBigEndianSafe()) {
1807
    generateImpl(false, "", "");
1808
    return OS.str();
1809
  }
1810
  // Little endian intrinsics are simple and don't require any argument
1811
  // swapping.
1812
  OS << "#ifdef __LITTLE_ENDIAN__\n";
1813

1814
  generateImpl(false, "", "");
1815

1816
  OS << "#else\n";
1817

1818
  // Big endian intrinsics are more complex. The user intended these
1819
  // intrinsics to operate on a vector "as-if" loaded by (V)LDR,
1820
  // but we load as-if (V)LD1. So we should swap all arguments and
1821
  // swap the return value too.
1822
  //
1823
  // If we call sub-intrinsics, we should call a version that does
1824
  // not re-swap the arguments!
1825
  generateImpl(true, "", "__noswap_");
1826

1827
  // If we're needed early, create a non-swapping variant for
1828
  // big-endian.
1829
  if (NeededEarly) {
1830
    generateImpl(false, "__noswap_", "__noswap_");
1831
  }
1832
  OS << "#endif\n\n";
1833

1834
  return OS.str();
1835
}
1836

1837
void Intrinsic::generateImpl(bool ReverseArguments,
1838
                             StringRef NamePrefix, StringRef CallPrefix) {
1839
  CurrentRecord = R;
1840

1841
  // If we call a macro, our local variables may be corrupted due to
1842
  // lack of proper lexical scoping. So, add a globally unique postfix
1843
  // to every variable.
1844
  //
1845
  // indexBody() should have set up the Dependencies set by now.
1846
  for (auto *I : Dependencies)
1847
    if (I->UseMacro) {
1848
      VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
1849
      break;
1850
    }
1851

1852
  initVariables();
1853

1854
  emitPrototype(NamePrefix);
1855

1856
  if (IsUnavailable) {
1857
    OS << " __attribute__((unavailable));";
1858
  } else {
1859
    emitOpeningBrace();
1860
    // Emit return variable declaration first as to not trigger
1861
    // -Wdeclaration-after-statement.
1862
    emitReturnVarDecl();
1863
    emitShadowedArgs();
1864
    if (ReverseArguments)
1865
      emitArgumentReversal();
1866
    emitBody(CallPrefix);
1867
    if (ReverseArguments)
1868
      emitReturnReversal();
1869
    emitReturn();
1870
    emitClosingBrace();
1871
  }
1872
  OS << "\n";
1873

1874
  CurrentRecord = nullptr;
1875
}
1876

1877
void Intrinsic::indexBody() {
1878
  CurrentRecord = R;
1879

1880
  initVariables();
1881
  // Emit return variable declaration first as to not trigger
1882
  // -Wdeclaration-after-statement.
1883
  emitReturnVarDecl();
1884
  emitBody("");
1885
  OS.str("");
1886

1887
  CurrentRecord = nullptr;
1888
}
1889

1890
//===----------------------------------------------------------------------===//
1891
// NeonEmitter implementation
1892
//===----------------------------------------------------------------------===//
1893

1894
Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,
1895
                                     std::optional<std::string> MangledName) {
1896
  // First, look up the name in the intrinsic map.
1897
  assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
1898
                  ("Intrinsic '" + Name + "' not found!").str());
1899
  auto &V = IntrinsicMap.find(Name.str())->second;
1900
  std::vector<Intrinsic *> GoodVec;
1901

1902
  // Create a string to print if we end up failing.
1903
  std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
1904
  for (unsigned I = 0; I < Types.size(); ++I) {
1905
    if (I != 0)
1906
      ErrMsg += ", ";
1907
    ErrMsg += Types[I].str();
1908
  }
1909
  ErrMsg += ")'\n";
1910
  ErrMsg += "Available overloads:\n";
1911

1912
  // Now, look through each intrinsic implementation and see if the types are
1913
  // compatible.
1914
  for (auto &I : V) {
1915
    ErrMsg += "  - " + I.getReturnType().str() + " " + I.getMangledName();
1916
    ErrMsg += "(";
1917
    for (unsigned A = 0; A < I.getNumParams(); ++A) {
1918
      if (A != 0)
1919
        ErrMsg += ", ";
1920
      ErrMsg += I.getParamType(A).str();
1921
    }
1922
    ErrMsg += ")\n";
1923

1924
    if (MangledName && MangledName != I.getMangledName(true))
1925
      continue;
1926

1927
    if (I.getNumParams() != Types.size())
1928
      continue;
1929

1930
    unsigned ArgNum = 0;
1931
    bool MatchingArgumentTypes = llvm::all_of(Types, [&](const auto &Type) {
1932
      return Type == I.getParamType(ArgNum++);
1933
    });
1934

1935
    if (MatchingArgumentTypes)
1936
      GoodVec.push_back(&I);
1937
  }
1938

1939
  assert_with_loc(!GoodVec.empty(),
1940
                  "No compatible intrinsic found - " + ErrMsg);
1941
  assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
1942

1943
  return *GoodVec.front();
1944
}
1945

1946
void NeonEmitter::createIntrinsic(Record *R,
1947
                                  SmallVectorImpl<Intrinsic *> &Out) {
1948
  std::string Name = std::string(R->getValueAsString("Name"));
1949
  std::string Proto = std::string(R->getValueAsString("Prototype"));
1950
  std::string Types = std::string(R->getValueAsString("Types"));
1951
  Record *OperationRec = R->getValueAsDef("Operation");
1952
  bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
1953
  std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
1954
  std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
1955
  bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
1956
  std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
1957

1958
  // Set the global current record. This allows assert_with_loc to produce
1959
  // decent location information even when highly nested.
1960
  CurrentRecord = R;
1961

1962
  ListInit *Body = OperationRec->getValueAsListInit("Ops");
1963

1964
  std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);
1965

1966
  ClassKind CK = ClassNone;
1967
  if (R->getSuperClasses().size() >= 2)
1968
    CK = ClassMap[R->getSuperClasses()[1].first];
1969

1970
  std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
1971
  if (!CartesianProductWith.empty()) {
1972
    std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith);
1973
    for (auto TS : TypeSpecs) {
1974
      Type DefaultT(TS, ".");
1975
      for (auto SrcTS : ProductTypeSpecs) {
1976
        Type DefaultSrcT(SrcTS, ".");
1977
        if (TS == SrcTS ||
1978
            DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
1979
          continue;
1980
        NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
1981
      }
1982
    }
1983
  } else {
1984
    for (auto TS : TypeSpecs) {
1985
      NewTypeSpecs.push_back(std::make_pair(TS, TS));
1986
    }
1987
  }
1988

1989
  llvm::sort(NewTypeSpecs);
1990
  NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()),
1991
		     NewTypeSpecs.end());
1992
  auto &Entry = IntrinsicMap[Name];
1993

1994
  for (auto &I : NewTypeSpecs) {
1995
    Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
1996
                       ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);
1997
    Out.push_back(&Entry.back());
1998
  }
1999

2000
  CurrentRecord = nullptr;
2001
}
2002

2003
/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2004
/// declaration of builtins, checking for unique builtin declarations.
2005
void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2006
                                 SmallVectorImpl<Intrinsic *> &Defs) {
2007
  OS << "#ifdef GET_NEON_BUILTINS\n";
2008

2009
  // We only want to emit a builtin once, and we want to emit them in
2010
  // alphabetical order, so use a std::set.
2011
  std::set<std::pair<std::string, std::string>> Builtins;
2012

2013
  for (auto *Def : Defs) {
2014
    if (Def->hasBody())
2015
      continue;
2016

2017
    std::string S = "__builtin_neon_" + Def->getMangledName() + ", \"";
2018
    S += Def->getBuiltinTypeStr();
2019
    S += "\", \"n\"";
2020

2021
    Builtins.emplace(S, Def->getTargetGuard());
2022
  }
2023

2024
  for (auto &S : Builtins) {
2025
    if (S.second == "")
2026
      OS << "BUILTIN(";
2027
    else
2028
      OS << "TARGET_BUILTIN(";
2029
    OS << S.first;
2030
    if (S.second == "")
2031
      OS << ")\n";
2032
    else
2033
      OS << ", \"" << S.second << "\")\n";
2034
  }
2035

2036
  OS << "#endif\n\n";
2037
}
2038

2039
void NeonEmitter::genStreamingSVECompatibleList(
2040
    raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
2041
  OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n";
2042

2043
  std::set<std::string> Emitted;
2044
  for (auto *Def : Defs) {
2045
    // If the def has a body (that is, it has Operation DAGs), it won't call
2046
    // __builtin_neon_* so we don't need to generate a definition for it.
2047
    if (Def->hasBody())
2048
      continue;
2049

2050
    std::string Name = Def->getMangledName();
2051
    if (Emitted.find(Name) != Emitted.end())
2052
      continue;
2053

2054
    // FIXME: We should make exceptions here for some NEON builtins that are
2055
    // permitted in streaming mode.
2056
    OS << "case NEON::BI__builtin_neon_" << Name
2057
       << ": BuiltinType = ArmNonStreaming; break;\n";
2058
    Emitted.insert(Name);
2059
  }
2060
  OS << "#endif\n\n";
2061
}
2062

2063
/// Generate the ARM and AArch64 overloaded type checking code for
2064
/// SemaChecking.cpp, checking for unique builtin declarations.
2065
void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2066
                                           SmallVectorImpl<Intrinsic *> &Defs) {
2067
  OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2068

2069
  // We record each overload check line before emitting because subsequent Inst
2070
  // definitions may extend the number of permitted types (i.e. augment the
2071
  // Mask). Use std::map to avoid sorting the table by hash number.
2072
  struct OverloadInfo {
2073
    uint64_t Mask = 0ULL;
2074
    int PtrArgNum = 0;
2075
    bool HasConstPtr = false;
2076
    OverloadInfo() = default;
2077
  };
2078
  std::map<std::string, OverloadInfo> OverloadMap;
2079

2080
  for (auto *Def : Defs) {
2081
    // If the def has a body (that is, it has Operation DAGs), it won't call
2082
    // __builtin_neon_* so we don't need to generate a definition for it.
2083
    if (Def->hasBody())
2084
      continue;
2085
    // Functions which have a scalar argument cannot be overloaded, no need to
2086
    // check them if we are emitting the type checking code.
2087
    if (Def->protoHasScalar())
2088
      continue;
2089

2090
    uint64_t Mask = 0ULL;
2091
    Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum();
2092

2093
    // Check if the function has a pointer or const pointer argument.
2094
    int PtrArgNum = -1;
2095
    bool HasConstPtr = false;
2096
    for (unsigned I = 0; I < Def->getNumParams(); ++I) {
2097
      const auto &Type = Def->getParamType(I);
2098
      if (Type.isPointer()) {
2099
        PtrArgNum = I;
2100
        HasConstPtr = Type.isConstPointer();
2101
      }
2102
    }
2103

2104
    // For sret builtins, adjust the pointer argument index.
2105
    if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
2106
      PtrArgNum += 1;
2107

2108
    std::string Name = Def->getName();
2109
    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2110
    // vst1_lane, vldap1_lane, and vstl1_lane intrinsics.  Using a pointer to
2111
    // the vector element type with one of those operations causes codegen to
2112
    // select an aligned load/store instruction.  If you want an unaligned
2113
    // operation, the pointer argument needs to have less alignment than element
2114
    // type, so just accept any pointer type.
2115
    if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane" ||
2116
        Name == "vldap1_lane" || Name == "vstl1_lane") {
2117
      PtrArgNum = -1;
2118
      HasConstPtr = false;
2119
    }
2120

2121
    if (Mask) {
2122
      std::string Name = Def->getMangledName();
2123
      OverloadMap.insert(std::make_pair(Name, OverloadInfo()));
2124
      OverloadInfo &OI = OverloadMap[Name];
2125
      OI.Mask |= Mask;
2126
      OI.PtrArgNum |= PtrArgNum;
2127
      OI.HasConstPtr = HasConstPtr;
2128
    }
2129
  }
2130

2131
  for (auto &I : OverloadMap) {
2132
    OverloadInfo &OI = I.second;
2133

2134
    OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
2135
    OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";
2136
    if (OI.PtrArgNum >= 0)
2137
      OS << "; PtrArgNum = " << OI.PtrArgNum;
2138
    if (OI.HasConstPtr)
2139
      OS << "; HasConstPtr = true";
2140
    OS << "; break;\n";
2141
  }
2142
  OS << "#endif\n\n";
2143
}
2144

2145
void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2146
                                        SmallVectorImpl<Intrinsic *> &Defs) {
2147
  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2148

2149
  std::set<std::string> Emitted;
2150

2151
  for (auto *Def : Defs) {
2152
    if (Def->hasBody())
2153
      continue;
2154
    // Functions which do not have an immediate do not need to have range
2155
    // checking code emitted.
2156
    if (!Def->hasImmediate())
2157
      continue;
2158
    if (Emitted.find(Def->getMangledName()) != Emitted.end())
2159
      continue;
2160

2161
    std::string LowerBound, UpperBound;
2162

2163
    Record *R = Def->getRecord();
2164
    if (R->getValueAsBit("isVXAR")) {
2165
      //VXAR takes an immediate in the range [0, 63]
2166
      LowerBound = "0";
2167
      UpperBound = "63";
2168
    } else if (R->getValueAsBit("isVCVT_N")) {
2169
      // VCVT between floating- and fixed-point values takes an immediate
2170
      // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
2171
      LowerBound = "1";
2172
	  if (Def->getBaseType().getElementSizeInBits() == 16 ||
2173
		  Def->getName().find('h') != std::string::npos)
2174
		// VCVTh operating on FP16 intrinsics in range [1, 16)
2175
		UpperBound = "15";
2176
	  else if (Def->getBaseType().getElementSizeInBits() == 32)
2177
        UpperBound = "31";
2178
	  else
2179
        UpperBound = "63";
2180
    } else if (R->getValueAsBit("isScalarShift")) {
2181
      // Right shifts have an 'r' in the name, left shifts do not. Convert
2182
      // instructions have the same bounds and right shifts.
2183
      if (Def->getName().find('r') != std::string::npos ||
2184
          Def->getName().find("cvt") != std::string::npos)
2185
        LowerBound = "1";
2186

2187
      UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
2188
    } else if (R->getValueAsBit("isShift")) {
2189
      // Builtins which are overloaded by type will need to have their upper
2190
      // bound computed at Sema time based on the type constant.
2191

2192
      // Right shifts have an 'r' in the name, left shifts do not.
2193
      if (Def->getName().find('r') != std::string::npos)
2194
        LowerBound = "1";
2195
      UpperBound = "RFT(TV, true)";
2196
    } else if (Def->getClassKind(true) == ClassB) {
2197
      // ClassB intrinsics have a type (and hence lane number) that is only
2198
      // known at runtime.
2199
      if (R->getValueAsBit("isLaneQ"))
2200
        UpperBound = "RFT(TV, false, true)";
2201
      else
2202
        UpperBound = "RFT(TV, false, false)";
2203
    } else {
2204
      // The immediate generally refers to a lane in the preceding argument.
2205
      assert(Def->getImmediateIdx() > 0);
2206
      Type T = Def->getParamType(Def->getImmediateIdx() - 1);
2207
      UpperBound = utostr(T.getNumElements() - 1);
2208
    }
2209

2210
    // Calculate the index of the immediate that should be range checked.
2211
    unsigned Idx = Def->getNumParams();
2212
    if (Def->hasImmediate())
2213
      Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
2214

2215
    OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
2216
       << "i = " << Idx << ";";
2217
    if (!LowerBound.empty())
2218
      OS << " l = " << LowerBound << ";";
2219
    if (!UpperBound.empty())
2220
      OS << " u = " << UpperBound << ";";
2221
    OS << " break;\n";
2222

2223
    Emitted.insert(Def->getMangledName());
2224
  }
2225

2226
  OS << "#endif\n\n";
2227
}
2228

2229
/// runHeader - Emit a file with sections defining:
2230
/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2231
/// 2. the SemaChecking code for the type overload checking.
2232
/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2233
void NeonEmitter::runHeader(raw_ostream &OS) {
2234
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2235

2236
  SmallVector<Intrinsic *, 128> Defs;
2237
  for (auto *R : RV)
2238
    createIntrinsic(R, Defs);
2239

2240
  // Generate shared BuiltinsXXX.def
2241
  genBuiltinsDef(OS, Defs);
2242

2243
  // Generate ARM overloaded type checking code for SemaChecking.cpp
2244
  genOverloadTypeCheckCode(OS, Defs);
2245

2246
  genStreamingSVECompatibleList(OS, Defs);
2247

2248
  // Generate ARM range checking code for shift/lane immediates.
2249
  genIntrinsicRangeCheckCode(OS, Defs);
2250
}
2251

2252
static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
2253
  std::string TypedefTypes(types);
2254
  std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
2255

2256
  // Emit vector typedefs.
2257
  bool InIfdef = false;
2258
  for (auto &TS : TDTypeVec) {
2259
    bool IsA64 = false;
2260
    Type T(TS, ".");
2261
    if (T.isDouble())
2262
      IsA64 = true;
2263

2264
    if (InIfdef && !IsA64) {
2265
      OS << "#endif\n";
2266
      InIfdef = false;
2267
    }
2268
    if (!InIfdef && IsA64) {
2269
      OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2270
      InIfdef = true;
2271
    }
2272

2273
    if (T.isPoly())
2274
      OS << "typedef __attribute__((neon_polyvector_type(";
2275
    else
2276
      OS << "typedef __attribute__((neon_vector_type(";
2277

2278
    Type T2 = T;
2279
    T2.makeScalar();
2280
    OS << T.getNumElements() << "))) ";
2281
    OS << T2.str();
2282
    OS << " " << T.str() << ";\n";
2283
  }
2284
  if (InIfdef)
2285
    OS << "#endif\n";
2286
  OS << "\n";
2287

2288
  // Emit struct typedefs.
2289
  InIfdef = false;
2290
  for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
2291
    for (auto &TS : TDTypeVec) {
2292
      bool IsA64 = false;
2293
      Type T(TS, ".");
2294
      if (T.isDouble())
2295
        IsA64 = true;
2296

2297
      if (InIfdef && !IsA64) {
2298
        OS << "#endif\n";
2299
        InIfdef = false;
2300
      }
2301
      if (!InIfdef && IsA64) {
2302
        OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2303
        InIfdef = true;
2304
      }
2305

2306
      const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
2307
      Type VT(TS, Mods);
2308
      OS << "typedef struct " << VT.str() << " {\n";
2309
      OS << "  " << T.str() << " val";
2310
      OS << "[" << NumMembers << "]";
2311
      OS << ";\n} ";
2312
      OS << VT.str() << ";\n";
2313
      OS << "\n";
2314
    }
2315
  }
2316
  if (InIfdef)
2317
    OS << "#endif\n";
2318
}
2319

2320
/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2321
/// is comprised of type definitions and function declarations.
2322
void NeonEmitter::run(raw_ostream &OS) {
2323
  OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
2324
        "------------------------------"
2325
        "---===\n"
2326
        " *\n"
2327
        " * Permission is hereby granted, free of charge, to any person "
2328
        "obtaining "
2329
        "a copy\n"
2330
        " * of this software and associated documentation files (the "
2331
        "\"Software\"),"
2332
        " to deal\n"
2333
        " * in the Software without restriction, including without limitation "
2334
        "the "
2335
        "rights\n"
2336
        " * to use, copy, modify, merge, publish, distribute, sublicense, "
2337
        "and/or sell\n"
2338
        " * copies of the Software, and to permit persons to whom the Software "
2339
        "is\n"
2340
        " * furnished to do so, subject to the following conditions:\n"
2341
        " *\n"
2342
        " * The above copyright notice and this permission notice shall be "
2343
        "included in\n"
2344
        " * all copies or substantial portions of the Software.\n"
2345
        " *\n"
2346
        " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2347
        "EXPRESS OR\n"
2348
        " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2349
        "MERCHANTABILITY,\n"
2350
        " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2351
        "SHALL THE\n"
2352
        " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2353
        "OTHER\n"
2354
        " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2355
        "ARISING FROM,\n"
2356
        " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2357
        "DEALINGS IN\n"
2358
        " * THE SOFTWARE.\n"
2359
        " *\n"
2360
        " *===-----------------------------------------------------------------"
2361
        "---"
2362
        "---===\n"
2363
        " */\n\n";
2364

2365
  OS << "#ifndef __ARM_NEON_H\n";
2366
  OS << "#define __ARM_NEON_H\n\n";
2367

2368
  OS << "#ifndef __ARM_FP\n";
2369
  OS << "#error \"NEON intrinsics not available with the soft-float ABI. "
2370
        "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";
2371
  OS << "#else\n\n";
2372

2373
  OS << "#include <stdint.h>\n\n";
2374

2375
  OS << "#include <arm_bf16.h>\n";
2376

2377
  OS << "#include <arm_vector_types.h>\n";
2378

2379
  // For now, signedness of polynomial types depends on target
2380
  OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2381
  OS << "typedef uint8_t poly8_t;\n";
2382
  OS << "typedef uint16_t poly16_t;\n";
2383
  OS << "typedef uint64_t poly64_t;\n";
2384
  OS << "typedef __uint128_t poly128_t;\n";
2385
  OS << "#else\n";
2386
  OS << "typedef int8_t poly8_t;\n";
2387
  OS << "typedef int16_t poly16_t;\n";
2388
  OS << "typedef int64_t poly64_t;\n";
2389
  OS << "#endif\n";
2390
  emitNeonTypeDefs("PcQPcPsQPsPlQPl", OS);
2391

2392
  OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2393
        "__nodebug__))\n\n";
2394

2395
  SmallVector<Intrinsic *, 128> Defs;
2396
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2397
  for (auto *R : RV)
2398
    createIntrinsic(R, Defs);
2399

2400
  for (auto *I : Defs)
2401
    I->indexBody();
2402

2403
  llvm::stable_sort(Defs, llvm::deref<std::less<>>());
2404

2405
  // Only emit a def when its requirements have been met.
2406
  // FIXME: This loop could be made faster, but it's fast enough for now.
2407
  bool MadeProgress = true;
2408
  std::string InGuard;
2409
  while (!Defs.empty() && MadeProgress) {
2410
    MadeProgress = false;
2411

2412
    for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2413
         I != Defs.end(); /*No step*/) {
2414
      bool DependenciesSatisfied = true;
2415
      for (auto *II : (*I)->getDependencies()) {
2416
        if (llvm::is_contained(Defs, II))
2417
          DependenciesSatisfied = false;
2418
      }
2419
      if (!DependenciesSatisfied) {
2420
        // Try the next one.
2421
        ++I;
2422
        continue;
2423
      }
2424

2425
      // Emit #endif/#if pair if needed.
2426
      if ((*I)->getArchGuard() != InGuard) {
2427
        if (!InGuard.empty())
2428
          OS << "#endif\n";
2429
        InGuard = (*I)->getArchGuard();
2430
        if (!InGuard.empty())
2431
          OS << "#if " << InGuard << "\n";
2432
      }
2433

2434
      // Actually generate the intrinsic code.
2435
      OS << (*I)->generate();
2436

2437
      MadeProgress = true;
2438
      I = Defs.erase(I);
2439
    }
2440
  }
2441
  assert(Defs.empty() && "Some requirements were not satisfied!");
2442
  if (!InGuard.empty())
2443
    OS << "#endif\n";
2444

2445
  OS << "\n";
2446
  OS << "#undef __ai\n\n";
2447
  OS << "#endif /* if !defined(__ARM_NEON) */\n";
2448
  OS << "#endif /* ifndef __ARM_FP */\n";
2449
}
2450

2451
/// run - Read the records in arm_fp16.td and output arm_fp16.h.  arm_fp16.h
2452
/// is comprised of type definitions and function declarations.
2453
void NeonEmitter::runFP16(raw_ostream &OS) {
2454
  OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
2455
        "------------------------------"
2456
        "---===\n"
2457
        " *\n"
2458
        " * Permission is hereby granted, free of charge, to any person "
2459
        "obtaining a copy\n"
2460
        " * of this software and associated documentation files (the "
2461
				"\"Software\"), to deal\n"
2462
        " * in the Software without restriction, including without limitation "
2463
				"the rights\n"
2464
        " * to use, copy, modify, merge, publish, distribute, sublicense, "
2465
				"and/or sell\n"
2466
        " * copies of the Software, and to permit persons to whom the Software "
2467
				"is\n"
2468
        " * furnished to do so, subject to the following conditions:\n"
2469
        " *\n"
2470
        " * The above copyright notice and this permission notice shall be "
2471
        "included in\n"
2472
        " * all copies or substantial portions of the Software.\n"
2473
        " *\n"
2474
        " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2475
        "EXPRESS OR\n"
2476
        " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2477
        "MERCHANTABILITY,\n"
2478
        " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2479
        "SHALL THE\n"
2480
        " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2481
        "OTHER\n"
2482
        " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2483
        "ARISING FROM,\n"
2484
        " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2485
        "DEALINGS IN\n"
2486
        " * THE SOFTWARE.\n"
2487
        " *\n"
2488
        " *===-----------------------------------------------------------------"
2489
        "---"
2490
        "---===\n"
2491
        " */\n\n";
2492

2493
  OS << "#ifndef __ARM_FP16_H\n";
2494
  OS << "#define __ARM_FP16_H\n\n";
2495

2496
  OS << "#include <stdint.h>\n\n";
2497

2498
  OS << "typedef __fp16 float16_t;\n";
2499

2500
  OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2501
        "__nodebug__))\n\n";
2502

2503
  SmallVector<Intrinsic *, 128> Defs;
2504
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2505
  for (auto *R : RV)
2506
    createIntrinsic(R, Defs);
2507

2508
  for (auto *I : Defs)
2509
    I->indexBody();
2510

2511
  llvm::stable_sort(Defs, llvm::deref<std::less<>>());
2512

2513
  // Only emit a def when its requirements have been met.
2514
  // FIXME: This loop could be made faster, but it's fast enough for now.
2515
  bool MadeProgress = true;
2516
  std::string InGuard;
2517
  while (!Defs.empty() && MadeProgress) {
2518
    MadeProgress = false;
2519

2520
    for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2521
         I != Defs.end(); /*No step*/) {
2522
      bool DependenciesSatisfied = true;
2523
      for (auto *II : (*I)->getDependencies()) {
2524
        if (llvm::is_contained(Defs, II))
2525
          DependenciesSatisfied = false;
2526
      }
2527
      if (!DependenciesSatisfied) {
2528
        // Try the next one.
2529
        ++I;
2530
        continue;
2531
      }
2532

2533
      // Emit #endif/#if pair if needed.
2534
      if ((*I)->getArchGuard() != InGuard) {
2535
        if (!InGuard.empty())
2536
          OS << "#endif\n";
2537
        InGuard = (*I)->getArchGuard();
2538
        if (!InGuard.empty())
2539
          OS << "#if " << InGuard << "\n";
2540
      }
2541

2542
      // Actually generate the intrinsic code.
2543
      OS << (*I)->generate();
2544

2545
      MadeProgress = true;
2546
      I = Defs.erase(I);
2547
    }
2548
  }
2549
  assert(Defs.empty() && "Some requirements were not satisfied!");
2550
  if (!InGuard.empty())
2551
    OS << "#endif\n";
2552

2553
  OS << "\n";
2554
  OS << "#undef __ai\n\n";
2555
  OS << "#endif /* __ARM_FP16_H */\n";
2556
}
2557

2558
void NeonEmitter::runVectorTypes(raw_ostream &OS) {
2559
  OS << "/*===---- arm_vector_types - ARM vector type "
2560
        "------===\n"
2561
        " *\n"
2562
        " *\n"
2563
        " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2564
        "Exceptions.\n"
2565
        " * See https://llvm.org/LICENSE.txt for license information.\n"
2566
        " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2567
        " *\n"
2568
        " *===-----------------------------------------------------------------"
2569
        "------===\n"
2570
        " */\n\n";
2571
  OS << "#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)\n";
2572
  OS << "#error \"This file should not be used standalone. Please include"
2573
        " arm_neon.h or arm_sve.h instead\"\n\n";
2574
  OS << "#endif\n";
2575
  OS << "#ifndef __ARM_NEON_TYPES_H\n";
2576
  OS << "#define __ARM_NEON_TYPES_H\n";
2577
  OS << "typedef float float32_t;\n";
2578
  OS << "typedef __fp16 float16_t;\n";
2579

2580
  OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2581
  OS << "typedef double float64_t;\n";
2582
  OS << "#endif\n\n";
2583

2584
  emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQd", OS);
2585

2586
  emitNeonTypeDefs("bQb", OS);
2587
  OS << "#endif // __ARM_NEON_TYPES_H\n";
2588
}
2589

2590
void NeonEmitter::runBF16(raw_ostream &OS) {
2591
  OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
2592
        "-----------------------------------===\n"
2593
        " *\n"
2594
        " *\n"
2595
        " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2596
        "Exceptions.\n"
2597
        " * See https://llvm.org/LICENSE.txt for license information.\n"
2598
        " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2599
        " *\n"
2600
        " *===-----------------------------------------------------------------"
2601
        "------===\n"
2602
        " */\n\n";
2603

2604
  OS << "#ifndef __ARM_BF16_H\n";
2605
  OS << "#define __ARM_BF16_H\n\n";
2606

2607
  OS << "typedef __bf16 bfloat16_t;\n";
2608

2609
  OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2610
        "__nodebug__))\n\n";
2611

2612
  SmallVector<Intrinsic *, 128> Defs;
2613
  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2614
  for (auto *R : RV)
2615
    createIntrinsic(R, Defs);
2616

2617
  for (auto *I : Defs)
2618
    I->indexBody();
2619

2620
  llvm::stable_sort(Defs, llvm::deref<std::less<>>());
2621

2622
  // Only emit a def when its requirements have been met.
2623
  // FIXME: This loop could be made faster, but it's fast enough for now.
2624
  bool MadeProgress = true;
2625
  std::string InGuard;
2626
  while (!Defs.empty() && MadeProgress) {
2627
    MadeProgress = false;
2628

2629
    for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2630
         I != Defs.end(); /*No step*/) {
2631
      bool DependenciesSatisfied = true;
2632
      for (auto *II : (*I)->getDependencies()) {
2633
        if (llvm::is_contained(Defs, II))
2634
          DependenciesSatisfied = false;
2635
      }
2636
      if (!DependenciesSatisfied) {
2637
        // Try the next one.
2638
        ++I;
2639
        continue;
2640
      }
2641

2642
      // Emit #endif/#if pair if needed.
2643
      if ((*I)->getArchGuard() != InGuard) {
2644
        if (!InGuard.empty())
2645
          OS << "#endif\n";
2646
        InGuard = (*I)->getArchGuard();
2647
        if (!InGuard.empty())
2648
          OS << "#if " << InGuard << "\n";
2649
      }
2650

2651
      // Actually generate the intrinsic code.
2652
      OS << (*I)->generate();
2653

2654
      MadeProgress = true;
2655
      I = Defs.erase(I);
2656
    }
2657
  }
2658
  assert(Defs.empty() && "Some requirements were not satisfied!");
2659
  if (!InGuard.empty())
2660
    OS << "#endif\n";
2661

2662
  OS << "\n";
2663
  OS << "#undef __ai\n\n";
2664

2665
  OS << "#endif\n";
2666
}
2667

2668
void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2669
  NeonEmitter(Records).run(OS);
2670
}
2671

2672
void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
2673
  NeonEmitter(Records).runFP16(OS);
2674
}
2675

2676
void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) {
2677
  NeonEmitter(Records).runBF16(OS);
2678
}
2679

2680
void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2681
  NeonEmitter(Records).runHeader(OS);
2682
}
2683

2684
void clang::EmitVectorTypes(RecordKeeper &Records, raw_ostream &OS) {
2685
  NeonEmitter(Records).runVectorTypes(OS);
2686
}
2687

2688
void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2689
  llvm_unreachable("Neon test generation no longer implemented!");
2690
}
2691

2692
Product

Resources

Company