Path: blob/main/contrib/llvm-project/clang/utils/TableGen/NeonEmitter.cpp
35230 views
//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This tablegen backend is responsible for emitting arm_neon.h, which includes9// a declaration and definition of each function specified by the ARM NEON10// compiler interface. See ARM document DUI0348B.11//12// Each NEON instruction is implemented in terms of 1 or more functions which13// are suffixed with the element type of the input vectors. Functions may be14// implemented in terms of generic vector operations such as +, *, -, etc. or15// by calling a __builtin_-prefixed function which will be handled by clang's16// CodeGen library.17//18// Additional validation code can be generated by this file when runHeader() is19// called, rather than the normal run() entry point.20//21// See also the documentation in include/clang/Basic/arm_neon.td.22//23//===----------------------------------------------------------------------===//2425#include "TableGenBackends.h"26#include "llvm/ADT/ArrayRef.h"27#include "llvm/ADT/DenseMap.h"28#include "llvm/ADT/STLExtras.h"29#include "llvm/ADT/SmallVector.h"30#include "llvm/ADT/StringExtras.h"31#include "llvm/ADT/StringRef.h"32#include "llvm/Support/Casting.h"33#include "llvm/Support/ErrorHandling.h"34#include "llvm/Support/raw_ostream.h"35#include "llvm/TableGen/Error.h"36#include "llvm/TableGen/Record.h"37#include "llvm/TableGen/SetTheory.h"38#include <algorithm>39#include <cassert>40#include <cctype>41#include <cstddef>42#include <cstdint>43#include <deque>44#include <map>45#include <optional>46#include <set>47#include <sstream>48#include <string>49#include <utility>50#include <vector>5152using namespace llvm;5354namespace {5556// While globals are generally bad, this one allows us to perform assertions57// liberally and somehow still trace them back to the def they indirectly58// came from.59static Record *CurrentRecord = nullptr;60static void assert_with_loc(bool Assertion, const std::string &Str) {61if (!Assertion) {62if (CurrentRecord)63PrintFatalError(CurrentRecord->getLoc(), Str);64else65PrintFatalError(Str);66}67}6869enum ClassKind {70ClassNone,71ClassI, // generic integer instruction, e.g., "i8" suffix72ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix73ClassW, // width-specific instruction, e.g., "8" suffix74ClassB, // bitcast arguments with enum argument to specify type75ClassL, // Logical instructions which are op instructions76// but we need to not emit any suffix for in our77// tests.78ClassNoTest // Instructions which we do not test since they are79// not TRUE instructions.80};8182/// NeonTypeFlags - Flags to identify the types for overloaded Neon83/// builtins. These must be kept in sync with the flags in84/// include/clang/Basic/TargetBuiltins.h.85namespace NeonTypeFlags {8687enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };8889enum EltType {90Int8,91Int16,92Int32,93Int64,94Poly8,95Poly16,96Poly64,97Poly128,98Float16,99Float32,100Float64,101BFloat16102};103104} // end namespace NeonTypeFlags105106class NeonEmitter;107108//===----------------------------------------------------------------------===//109// TypeSpec110//===----------------------------------------------------------------------===//111112/// A TypeSpec is just a simple wrapper around a string, but gets its own type113/// for strong typing purposes.114///115/// A TypeSpec can be used to create a type.116class TypeSpec : public std::string {117public:118static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {119std::vector<TypeSpec> Ret;120TypeSpec Acc;121for (char I : Str.str()) {122if (islower(I)) {123Acc.push_back(I);124Ret.push_back(TypeSpec(Acc));125Acc.clear();126} else {127Acc.push_back(I);128}129}130return Ret;131}132};133134//===----------------------------------------------------------------------===//135// Type136//===----------------------------------------------------------------------===//137138/// A Type. Not much more to say here.139class Type {140private:141TypeSpec TS;142143enum TypeKind {144Void,145Float,146SInt,147UInt,148Poly,149BFloat16,150};151TypeKind Kind;152bool Immediate, Constant, Pointer;153// ScalarForMangling and NoManglingQ are really not suited to live here as154// they are not related to the type. But they live in the TypeSpec (not the155// prototype), so this is really the only place to store them.156bool ScalarForMangling, NoManglingQ;157unsigned Bitwidth, ElementBitwidth, NumVectors;158159public:160Type()161: Kind(Void), Immediate(false), Constant(false),162Pointer(false), ScalarForMangling(false), NoManglingQ(false),163Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}164165Type(TypeSpec TS, StringRef CharMods)166: TS(std::move(TS)), Kind(Void), Immediate(false),167Constant(false), Pointer(false), ScalarForMangling(false),168NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {169applyModifiers(CharMods);170}171172/// Returns a type representing "void".173static Type getVoid() { return Type(); }174175bool operator==(const Type &Other) const { return str() == Other.str(); }176bool operator!=(const Type &Other) const { return !operator==(Other); }177178//179// Query functions180//181bool isScalarForMangling() const { return ScalarForMangling; }182bool noManglingQ() const { return NoManglingQ; }183184bool isPointer() const { return Pointer; }185bool isValue() const { return !isVoid() && !isPointer(); }186bool isScalar() const { return isValue() && NumVectors == 0; }187bool isVector() const { return isValue() && NumVectors > 0; }188bool isConstPointer() const { return Constant; }189bool isFloating() const { return Kind == Float; }190bool isInteger() const { return Kind == SInt || Kind == UInt; }191bool isPoly() const { return Kind == Poly; }192bool isSigned() const { return Kind == SInt; }193bool isImmediate() const { return Immediate; }194bool isFloat() const { return isFloating() && ElementBitwidth == 32; }195bool isDouble() const { return isFloating() && ElementBitwidth == 64; }196bool isHalf() const { return isFloating() && ElementBitwidth == 16; }197bool isChar() const { return ElementBitwidth == 8; }198bool isShort() const { return isInteger() && ElementBitwidth == 16; }199bool isInt() const { return isInteger() && ElementBitwidth == 32; }200bool isLong() const { return isInteger() && ElementBitwidth == 64; }201bool isVoid() const { return Kind == Void; }202bool isBFloat16() const { return Kind == BFloat16; }203unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }204unsigned getSizeInBits() const { return Bitwidth; }205unsigned getElementSizeInBits() const { return ElementBitwidth; }206unsigned getNumVectors() const { return NumVectors; }207208//209// Mutator functions210//211void makeUnsigned() {212assert(!isVoid() && "not a potentially signed type");213Kind = UInt;214}215void makeSigned() {216assert(!isVoid() && "not a potentially signed type");217Kind = SInt;218}219220void makeInteger(unsigned ElemWidth, bool Sign) {221assert(!isVoid() && "converting void to int probably not useful");222Kind = Sign ? SInt : UInt;223Immediate = false;224ElementBitwidth = ElemWidth;225}226227void makeImmediate(unsigned ElemWidth) {228Kind = SInt;229Immediate = true;230ElementBitwidth = ElemWidth;231}232233void makeScalar() {234Bitwidth = ElementBitwidth;235NumVectors = 0;236}237238void makeOneVector() {239assert(isVector());240NumVectors = 1;241}242243void make32BitElement() {244assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!");245ElementBitwidth = 32;246}247248void doubleLanes() {249assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");250Bitwidth = 128;251}252253void halveLanes() {254assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");255Bitwidth = 64;256}257258/// Return the C string representation of a type, which is the typename259/// defined in stdint.h or arm_neon.h.260std::string str() const;261262/// Return the string representation of a type, which is an encoded263/// string for passing to the BUILTIN() macro in Builtins.def.264std::string builtin_str() const;265266/// Return the value in NeonTypeFlags for this type.267unsigned getNeonEnum() const;268269/// Parse a type from a stdint.h or arm_neon.h typedef name,270/// for example uint32x2_t or int64_t.271static Type fromTypedefName(StringRef Name);272273private:274/// Creates the type based on the typespec string in TS.275/// Sets "Quad" to true if the "Q" or "H" modifiers were276/// seen. This is needed by applyModifier as some modifiers277/// only take effect if the type size was changed by "Q" or "H".278void applyTypespec(bool &Quad);279/// Applies prototype modifiers to the type.280void applyModifiers(StringRef Mods);281};282283//===----------------------------------------------------------------------===//284// Variable285//===----------------------------------------------------------------------===//286287/// A variable is a simple class that just has a type and a name.288class Variable {289Type T;290std::string N;291292public:293Variable() : T(Type::getVoid()) {}294Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}295296Type getType() const { return T; }297std::string getName() const { return "__" + N; }298};299300//===----------------------------------------------------------------------===//301// Intrinsic302//===----------------------------------------------------------------------===//303304/// The main grunt class. This represents an instantiation of an intrinsic with305/// a particular typespec and prototype.306class Intrinsic {307/// The Record this intrinsic was created from.308Record *R;309/// The unmangled name.310std::string Name;311/// The input and output typespecs. InTS == OutTS except when312/// CartesianProductWith is non-empty - this is the case for vreinterpret.313TypeSpec OutTS, InTS;314/// The base class kind. Most intrinsics use ClassS, which has full type315/// info for integers (s32/u32). Some use ClassI, which doesn't care about316/// signedness (i32), while some (ClassB) have no type at all, only a width317/// (32).318ClassKind CK;319/// The list of DAGs for the body. May be empty, in which case we should320/// emit a builtin call.321ListInit *Body;322/// The architectural ifdef guard.323std::string ArchGuard;324/// The architectural target() guard.325std::string TargetGuard;326/// Set if the Unavailable bit is 1. This means we don't generate a body,327/// just an "unavailable" attribute on a declaration.328bool IsUnavailable;329/// Is this intrinsic safe for big-endian? or does it need its arguments330/// reversing?331bool BigEndianSafe;332333/// The types of return value [0] and parameters [1..].334std::vector<Type> Types;335/// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.336int PolymorphicKeyType;337/// The local variables defined.338std::map<std::string, Variable> Variables;339/// NeededEarly - set if any other intrinsic depends on this intrinsic.340bool NeededEarly;341/// UseMacro - set if we should implement using a macro or unset for a342/// function.343bool UseMacro;344/// The set of intrinsics that this intrinsic uses/requires.345std::set<Intrinsic *> Dependencies;346/// The "base type", which is Type('d', OutTS). InBaseType is only347/// different if CartesianProductWith is non-empty (for vreinterpret).348Type BaseType, InBaseType;349/// The return variable.350Variable RetVar;351/// A postfix to apply to every variable. Defaults to "".352std::string VariablePostfix;353354NeonEmitter &Emitter;355std::stringstream OS;356357bool isBigEndianSafe() const {358if (BigEndianSafe)359return true;360361for (const auto &T : Types){362if (T.isVector() && T.getNumElements() > 1)363return false;364}365return true;366}367368public:369Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,370TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,371StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable, bool BigEndianSafe)372: R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),373ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),374BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),375UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),376Emitter(Emitter) {377// Modify the TypeSpec per-argument to get a concrete Type, and create378// known variables for each.379// Types[0] is the return value.380unsigned Pos = 0;381Types.emplace_back(OutTS, getNextModifiers(Proto, Pos));382StringRef Mods = getNextModifiers(Proto, Pos);383while (!Mods.empty()) {384Types.emplace_back(InTS, Mods);385if (Mods.contains('!'))386PolymorphicKeyType = Types.size() - 1;387388Mods = getNextModifiers(Proto, Pos);389}390391for (const auto &Type : Types) {392// If this builtin takes an immediate argument, we need to #define it rather393// than use a standard declaration, so that SemaChecking can range check394// the immediate passed by the user.395396// Pointer arguments need to use macros to avoid hiding aligned attributes397// from the pointer type.398399// It is not permitted to pass or return an __fp16 by value, so intrinsics400// taking a scalar float16_t must be implemented as macros.401if (Type.isImmediate() || Type.isPointer() ||402(Type.isScalar() && Type.isHalf()))403UseMacro = true;404}405}406407/// Get the Record that this intrinsic is based off.408Record *getRecord() const { return R; }409/// Get the set of Intrinsics that this intrinsic calls.410/// this is the set of immediate dependencies, NOT the411/// transitive closure.412const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }413/// Get the architectural guard string (#ifdef).414std::string getArchGuard() const { return ArchGuard; }415std::string getTargetGuard() const { return TargetGuard; }416/// Get the non-mangled name.417std::string getName() const { return Name; }418419/// Return true if the intrinsic takes an immediate operand.420bool hasImmediate() const {421return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });422}423424/// Return the parameter index of the immediate operand.425unsigned getImmediateIdx() const {426for (unsigned Idx = 0; Idx < Types.size(); ++Idx)427if (Types[Idx].isImmediate())428return Idx - 1;429llvm_unreachable("Intrinsic has no immediate");430}431432433unsigned getNumParams() const { return Types.size() - 1; }434Type getReturnType() const { return Types[0]; }435Type getParamType(unsigned I) const { return Types[I + 1]; }436Type getBaseType() const { return BaseType; }437Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; }438439/// Return true if the prototype has a scalar argument.440bool protoHasScalar() const;441442/// Return the index that parameter PIndex will sit at443/// in a generated function call. This is often just PIndex,444/// but may not be as things such as multiple-vector operands445/// and sret parameters need to be taken into account.446unsigned getGeneratedParamIdx(unsigned PIndex) {447unsigned Idx = 0;448if (getReturnType().getNumVectors() > 1)449// Multiple vectors are passed as sret.450++Idx;451452for (unsigned I = 0; I < PIndex; ++I)453Idx += std::max(1U, getParamType(I).getNumVectors());454455return Idx;456}457458bool hasBody() const { return Body && !Body->getValues().empty(); }459460void setNeededEarly() { NeededEarly = true; }461462bool operator<(const Intrinsic &Other) const {463// Sort lexicographically on a three-tuple (ArchGuard, TargetGuard, Name)464if (ArchGuard != Other.ArchGuard)465return ArchGuard < Other.ArchGuard;466if (TargetGuard != Other.TargetGuard)467return TargetGuard < Other.TargetGuard;468return Name < Other.Name;469}470471ClassKind getClassKind(bool UseClassBIfScalar = false) {472if (UseClassBIfScalar && !protoHasScalar())473return ClassB;474return CK;475}476477/// Return the name, mangled with type information.478/// If ForceClassS is true, use ClassS (u32/s32) instead479/// of the intrinsic's own type class.480std::string getMangledName(bool ForceClassS = false) const;481/// Return the type code for a builtin function call.482std::string getInstTypeCode(Type T, ClassKind CK) const;483/// Return the type string for a BUILTIN() macro in Builtins.def.484std::string getBuiltinTypeStr();485486/// Generate the intrinsic, returning code.487std::string generate();488/// Perform type checking and populate the dependency graph, but489/// don't generate code yet.490void indexBody();491492private:493StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const;494495std::string mangleName(std::string Name, ClassKind CK) const;496497void initVariables();498std::string replaceParamsIn(std::string S);499500void emitBodyAsBuiltinCall();501502void generateImpl(bool ReverseArguments,503StringRef NamePrefix, StringRef CallPrefix);504void emitReturn();505void emitBody(StringRef CallPrefix);506void emitShadowedArgs();507void emitArgumentReversal();508void emitReturnVarDecl();509void emitReturnReversal();510void emitReverseVariable(Variable &Dest, Variable &Src);511void emitNewLine();512void emitClosingBrace();513void emitOpeningBrace();514void emitPrototype(StringRef NamePrefix);515516class DagEmitter {517Intrinsic &Intr;518StringRef CallPrefix;519520public:521DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :522Intr(Intr), CallPrefix(CallPrefix) {523}524std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);525std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);526std::pair<Type, std::string> emitDagSplat(DagInit *DI);527std::pair<Type, std::string> emitDagDup(DagInit *DI);528std::pair<Type, std::string> emitDagDupTyped(DagInit *DI);529std::pair<Type, std::string> emitDagShuffle(DagInit *DI);530std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);531std::pair<Type, std::string> emitDagCall(DagInit *DI,532bool MatchMangledName);533std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);534std::pair<Type, std::string> emitDagLiteral(DagInit *DI);535std::pair<Type, std::string> emitDagOp(DagInit *DI);536std::pair<Type, std::string> emitDag(DagInit *DI);537};538};539540//===----------------------------------------------------------------------===//541// NeonEmitter542//===----------------------------------------------------------------------===//543544class NeonEmitter {545RecordKeeper &Records;546DenseMap<Record *, ClassKind> ClassMap;547std::map<std::string, std::deque<Intrinsic>> IntrinsicMap;548unsigned UniqueNumber;549550void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);551void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);552void genStreamingSVECompatibleList(raw_ostream &OS,553SmallVectorImpl<Intrinsic *> &Defs);554void genOverloadTypeCheckCode(raw_ostream &OS,555SmallVectorImpl<Intrinsic *> &Defs);556void genIntrinsicRangeCheckCode(raw_ostream &OS,557SmallVectorImpl<Intrinsic *> &Defs);558559public:560/// Called by Intrinsic - this attempts to get an intrinsic that takes561/// the given types as arguments.562Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types,563std::optional<std::string> MangledName);564565/// Called by Intrinsic - returns a globally-unique number.566unsigned getUniqueNumber() { return UniqueNumber++; }567568NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {569Record *SI = R.getClass("SInst");570Record *II = R.getClass("IInst");571Record *WI = R.getClass("WInst");572Record *SOpI = R.getClass("SOpInst");573Record *IOpI = R.getClass("IOpInst");574Record *WOpI = R.getClass("WOpInst");575Record *LOpI = R.getClass("LOpInst");576Record *NoTestOpI = R.getClass("NoTestOpInst");577578ClassMap[SI] = ClassS;579ClassMap[II] = ClassI;580ClassMap[WI] = ClassW;581ClassMap[SOpI] = ClassS;582ClassMap[IOpI] = ClassI;583ClassMap[WOpI] = ClassW;584ClassMap[LOpI] = ClassL;585ClassMap[NoTestOpI] = ClassNoTest;586}587588// Emit arm_neon.h.inc589void run(raw_ostream &o);590591// Emit arm_fp16.h.inc592void runFP16(raw_ostream &o);593594// Emit arm_bf16.h.inc595void runBF16(raw_ostream &o);596597void runVectorTypes(raw_ostream &o);598599// Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and600// arm_bf16.h601void runHeader(raw_ostream &o);602};603604} // end anonymous namespace605606//===----------------------------------------------------------------------===//607// Type implementation608//===----------------------------------------------------------------------===//609610std::string Type::str() const {611if (isVoid())612return "void";613std::string S;614615if (isInteger() && !isSigned())616S += "u";617618if (isPoly())619S += "poly";620else if (isFloating())621S += "float";622else if (isBFloat16())623S += "bfloat";624else625S += "int";626627S += utostr(ElementBitwidth);628if (isVector())629S += "x" + utostr(getNumElements());630if (NumVectors > 1)631S += "x" + utostr(NumVectors);632S += "_t";633634if (Constant)635S += " const";636if (Pointer)637S += " *";638639return S;640}641642std::string Type::builtin_str() const {643std::string S;644if (isVoid())645return "v";646647if (isPointer()) {648// All pointers are void pointers.649S = "v";650if (isConstPointer())651S += "C";652S += "*";653return S;654} else if (isInteger())655switch (ElementBitwidth) {656case 8: S += "c"; break;657case 16: S += "s"; break;658case 32: S += "i"; break;659case 64: S += "Wi"; break;660case 128: S += "LLLi"; break;661default: llvm_unreachable("Unhandled case!");662}663else if (isBFloat16()) {664assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits");665S += "y";666} else667switch (ElementBitwidth) {668case 16: S += "h"; break;669case 32: S += "f"; break;670case 64: S += "d"; break;671default: llvm_unreachable("Unhandled case!");672}673674// FIXME: NECESSARY???????????????????????????????????????????????????????????????????????675if (isChar() && !isPointer() && isSigned())676// Make chars explicitly signed.677S = "S" + S;678else if (isInteger() && !isSigned())679S = "U" + S;680681// Constant indices are "int", but have the "constant expression" modifier.682if (isImmediate()) {683assert(isInteger() && isSigned());684S = "I" + S;685}686687if (isScalar())688return S;689690std::string Ret;691for (unsigned I = 0; I < NumVectors; ++I)692Ret += "V" + utostr(getNumElements()) + S;693694return Ret;695}696697unsigned Type::getNeonEnum() const {698unsigned Addend;699switch (ElementBitwidth) {700case 8: Addend = 0; break;701case 16: Addend = 1; break;702case 32: Addend = 2; break;703case 64: Addend = 3; break;704case 128: Addend = 4; break;705default: llvm_unreachable("Unhandled element bitwidth!");706}707708unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;709if (isPoly()) {710// Adjustment needed because Poly32 doesn't exist.711if (Addend >= 2)712--Addend;713Base = (unsigned)NeonTypeFlags::Poly8 + Addend;714}715if (isFloating()) {716assert(Addend != 0 && "Float8 doesn't exist!");717Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);718}719720if (isBFloat16()) {721assert(Addend == 1 && "BFloat16 is only 16 bit");722Base = (unsigned)NeonTypeFlags::BFloat16;723}724725if (Bitwidth == 128)726Base |= (unsigned)NeonTypeFlags::QuadFlag;727if (isInteger() && !isSigned())728Base |= (unsigned)NeonTypeFlags::UnsignedFlag;729730return Base;731}732733Type Type::fromTypedefName(StringRef Name) {734Type T;735T.Kind = SInt;736737if (Name.consume_front("u"))738T.Kind = UInt;739740if (Name.consume_front("float")) {741T.Kind = Float;742} else if (Name.consume_front("poly")) {743T.Kind = Poly;744} else if (Name.consume_front("bfloat")) {745T.Kind = BFloat16;746} else {747assert(Name.starts_with("int"));748Name = Name.drop_front(3);749}750751unsigned I = 0;752for (I = 0; I < Name.size(); ++I) {753if (!isdigit(Name[I]))754break;755}756Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);757Name = Name.drop_front(I);758759T.Bitwidth = T.ElementBitwidth;760T.NumVectors = 1;761762if (Name.consume_front("x")) {763unsigned I = 0;764for (I = 0; I < Name.size(); ++I) {765if (!isdigit(Name[I]))766break;767}768unsigned NumLanes;769Name.substr(0, I).getAsInteger(10, NumLanes);770Name = Name.drop_front(I);771T.Bitwidth = T.ElementBitwidth * NumLanes;772} else {773// Was scalar.774T.NumVectors = 0;775}776if (Name.consume_front("x")) {777unsigned I = 0;778for (I = 0; I < Name.size(); ++I) {779if (!isdigit(Name[I]))780break;781}782Name.substr(0, I).getAsInteger(10, T.NumVectors);783Name = Name.drop_front(I);784}785786assert(Name.starts_with("_t") && "Malformed typedef!");787return T;788}789790void Type::applyTypespec(bool &Quad) {791std::string S = TS;792ScalarForMangling = false;793Kind = SInt;794ElementBitwidth = ~0U;795NumVectors = 1;796797for (char I : S) {798switch (I) {799case 'S':800ScalarForMangling = true;801break;802case 'H':803NoManglingQ = true;804Quad = true;805break;806case 'Q':807Quad = true;808break;809case 'P':810Kind = Poly;811break;812case 'U':813Kind = UInt;814break;815case 'c':816ElementBitwidth = 8;817break;818case 'h':819Kind = Float;820[[fallthrough]];821case 's':822ElementBitwidth = 16;823break;824case 'f':825Kind = Float;826[[fallthrough]];827case 'i':828ElementBitwidth = 32;829break;830case 'd':831Kind = Float;832[[fallthrough]];833case 'l':834ElementBitwidth = 64;835break;836case 'k':837ElementBitwidth = 128;838// Poly doesn't have a 128x1 type.839if (isPoly())840NumVectors = 0;841break;842case 'b':843Kind = BFloat16;844ElementBitwidth = 16;845break;846default:847llvm_unreachable("Unhandled type code!");848}849}850assert(ElementBitwidth != ~0U && "Bad element bitwidth!");851852Bitwidth = Quad ? 128 : 64;853}854855void Type::applyModifiers(StringRef Mods) {856bool AppliedQuad = false;857applyTypespec(AppliedQuad);858859for (char Mod : Mods) {860switch (Mod) {861case '.':862break;863case 'v':864Kind = Void;865break;866case 'S':867Kind = SInt;868break;869case 'U':870Kind = UInt;871break;872case 'B':873Kind = BFloat16;874ElementBitwidth = 16;875break;876case 'F':877Kind = Float;878break;879case 'P':880Kind = Poly;881break;882case '>':883assert(ElementBitwidth < 128);884ElementBitwidth *= 2;885break;886case '<':887assert(ElementBitwidth > 8);888ElementBitwidth /= 2;889break;890case '1':891NumVectors = 0;892break;893case '2':894NumVectors = 2;895break;896case '3':897NumVectors = 3;898break;899case '4':900NumVectors = 4;901break;902case '*':903Pointer = true;904break;905case 'c':906Constant = true;907break;908case 'Q':909Bitwidth = 128;910break;911case 'q':912Bitwidth = 64;913break;914case 'I':915Kind = SInt;916ElementBitwidth = Bitwidth = 32;917NumVectors = 0;918Immediate = true;919break;920case 'p':921if (isPoly())922Kind = UInt;923break;924case '!':925// Key type, handled elsewhere.926break;927default:928llvm_unreachable("Unhandled character!");929}930}931}932933//===----------------------------------------------------------------------===//934// Intrinsic implementation935//===----------------------------------------------------------------------===//936937StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const {938if (Proto.size() == Pos)939return StringRef();940else if (Proto[Pos] != '(')941return Proto.substr(Pos++, 1);942943size_t Start = Pos + 1;944size_t End = Proto.find(')', Start);945assert_with_loc(End != StringRef::npos, "unmatched modifier group paren");946Pos = End + 1;947return Proto.slice(Start, End);948}949950std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {951char typeCode = '\0';952bool printNumber = true;953954if (CK == ClassB && TargetGuard == "neon")955return "";956957if (T.isBFloat16())958return "bf16";959960if (T.isPoly())961typeCode = 'p';962else if (T.isInteger())963typeCode = T.isSigned() ? 's' : 'u';964else965typeCode = 'f';966967if (CK == ClassI) {968switch (typeCode) {969default:970break;971case 's':972case 'u':973case 'p':974typeCode = 'i';975break;976}977}978if (CK == ClassB && TargetGuard == "neon") {979typeCode = '\0';980}981982std::string S;983if (typeCode != '\0')984S.push_back(typeCode);985if (printNumber)986S += utostr(T.getElementSizeInBits());987988return S;989}990991std::string Intrinsic::getBuiltinTypeStr() {992ClassKind LocalCK = getClassKind(true);993std::string S;994995Type RetT = getReturnType();996if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&997!RetT.isFloating() && !RetT.isBFloat16())998RetT.makeInteger(RetT.getElementSizeInBits(), false);9991000// Since the return value must be one type, return a vector type of the1001// appropriate width which we will bitcast. An exception is made for1002// returning structs of 2, 3, or 4 vectors which are returned in a sret-like1003// fashion, storing them to a pointer arg.1004if (RetT.getNumVectors() > 1) {1005S += "vv*"; // void result with void* first argument1006} else {1007if (RetT.isPoly())1008RetT.makeInteger(RetT.getElementSizeInBits(), false);1009if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned())1010RetT.makeSigned();10111012if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar())1013// Cast to vector of 8-bit elements.1014RetT.makeInteger(8, true);10151016S += RetT.builtin_str();1017}10181019for (unsigned I = 0; I < getNumParams(); ++I) {1020Type T = getParamType(I);1021if (T.isPoly())1022T.makeInteger(T.getElementSizeInBits(), false);10231024if (LocalCK == ClassB && !T.isScalar())1025T.makeInteger(8, true);1026// Halves always get converted to 8-bit elements.1027if (T.isHalf() && T.isVector() && !T.isScalarForMangling())1028T.makeInteger(8, true);10291030if (LocalCK == ClassI && T.isInteger())1031T.makeSigned();10321033if (hasImmediate() && getImmediateIdx() == I)1034T.makeImmediate(32);10351036S += T.builtin_str();1037}10381039// Extra constant integer to hold type class enum for this function, e.g. s81040if (LocalCK == ClassB)1041S += "i";10421043return S;1044}10451046std::string Intrinsic::getMangledName(bool ForceClassS) const {1047// Check if the prototype has a scalar operand with the type of the vector1048// elements. If not, bitcasting the args will take care of arg checking.1049// The actual signedness etc. will be taken care of with special enums.1050ClassKind LocalCK = CK;1051if (!protoHasScalar())1052LocalCK = ClassB;10531054return mangleName(Name, ForceClassS ? ClassS : LocalCK);1055}10561057std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {1058std::string typeCode = getInstTypeCode(BaseType, LocalCK);1059std::string S = Name;10601061if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||1062Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" ||1063Name == "vcvt_f32_bf16")1064return Name;10651066if (!typeCode.empty()) {1067// If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.1068if (Name.size() >= 3 && isdigit(Name.back()) &&1069Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')1070S.insert(S.length() - 3, "_" + typeCode);1071else1072S += "_" + typeCode;1073}10741075if (BaseType != InBaseType) {1076// A reinterpret - out the input base type at the end.1077S += "_" + getInstTypeCode(InBaseType, LocalCK);1078}10791080if (LocalCK == ClassB && TargetGuard == "neon")1081S += "_v";10821083// Insert a 'q' before the first '_' character so that it ends up before1084// _lane or _n on vector-scalar operations.1085if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {1086size_t Pos = S.find('_');1087S.insert(Pos, "q");1088}10891090char Suffix = '\0';1091if (BaseType.isScalarForMangling()) {1092switch (BaseType.getElementSizeInBits()) {1093case 8: Suffix = 'b'; break;1094case 16: Suffix = 'h'; break;1095case 32: Suffix = 's'; break;1096case 64: Suffix = 'd'; break;1097default: llvm_unreachable("Bad suffix!");1098}1099}1100if (Suffix != '\0') {1101size_t Pos = S.find('_');1102S.insert(Pos, &Suffix, 1);1103}11041105return S;1106}11071108std::string Intrinsic::replaceParamsIn(std::string S) {1109while (S.find('$') != std::string::npos) {1110size_t Pos = S.find('$');1111size_t End = Pos + 1;1112while (isalpha(S[End]))1113++End;11141115std::string VarName = S.substr(Pos + 1, End - Pos - 1);1116assert_with_loc(Variables.find(VarName) != Variables.end(),1117"Variable not defined!");1118S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());1119}11201121return S;1122}11231124void Intrinsic::initVariables() {1125Variables.clear();11261127// Modify the TypeSpec per-argument to get a concrete Type, and create1128// known variables for each.1129for (unsigned I = 1; I < Types.size(); ++I) {1130char NameC = '0' + (I - 1);1131std::string Name = "p";1132Name.push_back(NameC);11331134Variables[Name] = Variable(Types[I], Name + VariablePostfix);1135}1136RetVar = Variable(Types[0], "ret" + VariablePostfix);1137}11381139void Intrinsic::emitPrototype(StringRef NamePrefix) {1140if (UseMacro) {1141OS << "#define ";1142} else {1143OS << "__ai ";1144if (TargetGuard != "")1145OS << "__attribute__((target(\"" << TargetGuard << "\"))) ";1146OS << Types[0].str() << " ";1147}11481149OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";11501151for (unsigned I = 0; I < getNumParams(); ++I) {1152if (I != 0)1153OS << ", ";11541155char NameC = '0' + I;1156std::string Name = "p";1157Name.push_back(NameC);1158assert(Variables.find(Name) != Variables.end());1159Variable &V = Variables[Name];11601161if (!UseMacro)1162OS << V.getType().str() << " ";1163OS << V.getName();1164}11651166OS << ")";1167}11681169void Intrinsic::emitOpeningBrace() {1170if (UseMacro)1171OS << " __extension__ ({";1172else1173OS << " {";1174emitNewLine();1175}11761177void Intrinsic::emitClosingBrace() {1178if (UseMacro)1179OS << "})";1180else1181OS << "}";1182}11831184void Intrinsic::emitNewLine() {1185if (UseMacro)1186OS << " \\\n";1187else1188OS << "\n";1189}11901191void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {1192if (Dest.getType().getNumVectors() > 1) {1193emitNewLine();11941195for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {1196OS << " " << Dest.getName() << ".val[" << K << "] = "1197<< "__builtin_shufflevector("1198<< Src.getName() << ".val[" << K << "], "1199<< Src.getName() << ".val[" << K << "]";1200for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)1201OS << ", " << J;1202OS << ");";1203emitNewLine();1204}1205} else {1206OS << " " << Dest.getName()1207<< " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName();1208for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)1209OS << ", " << J;1210OS << ");";1211emitNewLine();1212}1213}12141215void Intrinsic::emitArgumentReversal() {1216if (isBigEndianSafe())1217return;12181219// Reverse all vector arguments.1220for (unsigned I = 0; I < getNumParams(); ++I) {1221std::string Name = "p" + utostr(I);1222std::string NewName = "rev" + utostr(I);12231224Variable &V = Variables[Name];1225Variable NewV(V.getType(), NewName + VariablePostfix);12261227if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)1228continue;12291230OS << " " << NewV.getType().str() << " " << NewV.getName() << ";";1231emitReverseVariable(NewV, V);1232V = NewV;1233}1234}12351236void Intrinsic::emitReturnVarDecl() {1237assert(RetVar.getType() == Types[0]);1238// Create a return variable, if we're not void.1239if (!RetVar.getType().isVoid()) {1240OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";";1241emitNewLine();1242}1243}12441245void Intrinsic::emitReturnReversal() {1246if (isBigEndianSafe())1247return;1248if (!getReturnType().isVector() || getReturnType().isVoid() ||1249getReturnType().getNumElements() == 1)1250return;1251emitReverseVariable(RetVar, RetVar);1252}12531254void Intrinsic::emitShadowedArgs() {1255// Macro arguments are not type-checked like inline function arguments,1256// so assign them to local temporaries to get the right type checking.1257if (!UseMacro)1258return;12591260for (unsigned I = 0; I < getNumParams(); ++I) {1261// Do not create a temporary for an immediate argument.1262// That would defeat the whole point of using a macro!1263if (getParamType(I).isImmediate())1264continue;1265// Do not create a temporary for pointer arguments. The input1266// pointer may have an alignment hint.1267if (getParamType(I).isPointer())1268continue;12691270std::string Name = "p" + utostr(I);12711272assert(Variables.find(Name) != Variables.end());1273Variable &V = Variables[Name];12741275std::string NewName = "s" + utostr(I);1276Variable V2(V.getType(), NewName + VariablePostfix);12771278OS << " " << V2.getType().str() << " " << V2.getName() << " = "1279<< V.getName() << ";";1280emitNewLine();12811282V = V2;1283}1284}12851286bool Intrinsic::protoHasScalar() const {1287return llvm::any_of(1288Types, [](const Type &T) { return T.isScalar() && !T.isImmediate(); });1289}12901291void Intrinsic::emitBodyAsBuiltinCall() {1292std::string S;12931294// If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit1295// sret-like argument.1296bool SRet = getReturnType().getNumVectors() >= 2;12971298StringRef N = Name;1299ClassKind LocalCK = CK;1300if (!protoHasScalar())1301LocalCK = ClassB;13021303if (!getReturnType().isVoid() && !SRet)1304S += "(" + RetVar.getType().str() + ") ";13051306S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "(";13071308if (SRet)1309S += "&" + RetVar.getName() + ", ";13101311for (unsigned I = 0; I < getNumParams(); ++I) {1312Variable &V = Variables["p" + utostr(I)];1313Type T = V.getType();13141315// Handle multiple-vector values specially, emitting each subvector as an1316// argument to the builtin.1317if (T.getNumVectors() > 1) {1318// Check if an explicit cast is needed.1319std::string Cast;1320if (LocalCK == ClassB) {1321Type T2 = T;1322T2.makeOneVector();1323T2.makeInteger(8, /*Sign=*/true);1324Cast = "(" + T2.str() + ")";1325}13261327for (unsigned J = 0; J < T.getNumVectors(); ++J)1328S += Cast + V.getName() + ".val[" + utostr(J) + "], ";1329continue;1330}13311332std::string Arg = V.getName();1333Type CastToType = T;13341335// Check if an explicit cast is needed.1336if (CastToType.isVector() &&1337(LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) {1338CastToType.makeInteger(8, true);1339Arg = "(" + CastToType.str() + ")" + Arg;1340} else if (CastToType.isVector() && LocalCK == ClassI) {1341if (CastToType.isInteger())1342CastToType.makeSigned();1343Arg = "(" + CastToType.str() + ")" + Arg;1344}13451346S += Arg + ", ";1347}13481349// Extra constant integer to hold type class enum for this function, e.g. s81350if (getClassKind(true) == ClassB) {1351S += utostr(getPolymorphicKeyType().getNeonEnum());1352} else {1353// Remove extraneous ", ".1354S.pop_back();1355S.pop_back();1356}1357S += ");";13581359std::string RetExpr;1360if (!SRet && !RetVar.getType().isVoid())1361RetExpr = RetVar.getName() + " = ";13621363OS << " " << RetExpr << S;1364emitNewLine();1365}13661367void Intrinsic::emitBody(StringRef CallPrefix) {1368std::vector<std::string> Lines;13691370if (!Body || Body->getValues().empty()) {1371// Nothing specific to output - must output a builtin.1372emitBodyAsBuiltinCall();1373return;1374}13751376// We have a list of "things to output". The last should be returned.1377for (auto *I : Body->getValues()) {1378if (StringInit *SI = dyn_cast<StringInit>(I)) {1379Lines.push_back(replaceParamsIn(SI->getAsString()));1380} else if (DagInit *DI = dyn_cast<DagInit>(I)) {1381DagEmitter DE(*this, CallPrefix);1382Lines.push_back(DE.emitDag(DI).second + ";");1383}1384}13851386assert(!Lines.empty() && "Empty def?");1387if (!RetVar.getType().isVoid())1388Lines.back().insert(0, RetVar.getName() + " = ");13891390for (auto &L : Lines) {1391OS << " " << L;1392emitNewLine();1393}1394}13951396void Intrinsic::emitReturn() {1397if (RetVar.getType().isVoid())1398return;1399if (UseMacro)1400OS << " " << RetVar.getName() << ";";1401else1402OS << " return " << RetVar.getName() << ";";1403emitNewLine();1404}14051406std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {1407// At this point we should only be seeing a def.1408DefInit *DefI = cast<DefInit>(DI->getOperator());1409std::string Op = DefI->getAsString();14101411if (Op == "cast" || Op == "bitcast")1412return emitDagCast(DI, Op == "bitcast");1413if (Op == "shuffle")1414return emitDagShuffle(DI);1415if (Op == "dup")1416return emitDagDup(DI);1417if (Op == "dup_typed")1418return emitDagDupTyped(DI);1419if (Op == "splat")1420return emitDagSplat(DI);1421if (Op == "save_temp")1422return emitDagSaveTemp(DI);1423if (Op == "op")1424return emitDagOp(DI);1425if (Op == "call" || Op == "call_mangled")1426return emitDagCall(DI, Op == "call_mangled");1427if (Op == "name_replace")1428return emitDagNameReplace(DI);1429if (Op == "literal")1430return emitDagLiteral(DI);1431assert_with_loc(false, "Unknown operation!");1432return std::make_pair(Type::getVoid(), "");1433}14341435std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {1436std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();1437if (DI->getNumArgs() == 2) {1438// Unary op.1439std::pair<Type, std::string> R =1440emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));1441return std::make_pair(R.first, Op + R.second);1442} else {1443assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");1444std::pair<Type, std::string> R1 =1445emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));1446std::pair<Type, std::string> R2 =1447emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2)));1448assert_with_loc(R1.first == R2.first, "Argument type mismatch!");1449return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);1450}1451}14521453std::pair<Type, std::string>1454Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) {1455std::vector<Type> Types;1456std::vector<std::string> Values;1457for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {1458std::pair<Type, std::string> R =1459emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1)));1460Types.push_back(R.first);1461Values.push_back(R.second);1462}14631464// Look up the called intrinsic.1465std::string N;1466if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))1467N = SI->getAsUnquotedString();1468else1469N = emitDagArg(DI->getArg(0), "").second;1470std::optional<std::string> MangledName;1471if (MatchMangledName) {1472if (Intr.getRecord()->getValueAsBit("isLaneQ"))1473N += "q";1474MangledName = Intr.mangleName(N, ClassS);1475}1476Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName);14771478// Make sure the callee is known as an early def.1479Callee.setNeededEarly();1480Intr.Dependencies.insert(&Callee);14811482// Now create the call itself.1483std::string S;1484if (!Callee.isBigEndianSafe())1485S += CallPrefix.str();1486S += Callee.getMangledName(true) + "(";1487for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {1488if (I != 0)1489S += ", ";1490S += Values[I];1491}1492S += ")";14931494return std::make_pair(Callee.getReturnType(), S);1495}14961497std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,1498bool IsBitCast){1499// (cast MOD* VAL) -> cast VAL to type given by MOD.1500std::pair<Type, std::string> R =1501emitDagArg(DI->getArg(DI->getNumArgs() - 1),1502std::string(DI->getArgNameStr(DI->getNumArgs() - 1)));1503Type castToType = R.first;1504for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {15051506// MOD can take several forms:1507// 1. $X - take the type of parameter / variable X.1508// 2. The value "R" - take the type of the return type.1509// 3. a type string1510// 4. The value "U" or "S" to switch the signedness.1511// 5. The value "H" or "D" to half or double the bitwidth.1512// 6. The value "8" to convert to 8-bit (signed) integer lanes.1513if (!DI->getArgNameStr(ArgIdx).empty()) {1514assert_with_loc(Intr.Variables.find(std::string(1515DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(),1516"Variable not found");1517castToType =1518Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType();1519} else {1520StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));1521assert_with_loc(SI, "Expected string type or $Name for cast type");15221523if (SI->getAsUnquotedString() == "R") {1524castToType = Intr.getReturnType();1525} else if (SI->getAsUnquotedString() == "U") {1526castToType.makeUnsigned();1527} else if (SI->getAsUnquotedString() == "S") {1528castToType.makeSigned();1529} else if (SI->getAsUnquotedString() == "H") {1530castToType.halveLanes();1531} else if (SI->getAsUnquotedString() == "D") {1532castToType.doubleLanes();1533} else if (SI->getAsUnquotedString() == "8") {1534castToType.makeInteger(8, true);1535} else if (SI->getAsUnquotedString() == "32") {1536castToType.make32BitElement();1537} else {1538castToType = Type::fromTypedefName(SI->getAsUnquotedString());1539assert_with_loc(!castToType.isVoid(), "Unknown typedef");1540}1541}1542}15431544std::string S;1545if (IsBitCast) {1546// Emit a reinterpret cast. The second operand must be an lvalue, so create1547// a temporary.1548std::string N = "reint";1549unsigned I = 0;1550while (Intr.Variables.find(N) != Intr.Variables.end())1551N = "reint" + utostr(++I);1552Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix);15531554Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = "1555<< R.second << ";";1556Intr.emitNewLine();15571558S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + "";1559} else {1560// Emit a normal (static) cast.1561S = "(" + castToType.str() + ")(" + R.second + ")";1562}15631564return std::make_pair(castToType, S);1565}15661567std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){1568// See the documentation in arm_neon.td for a description of these operators.1569class LowHalf : public SetTheory::Operator {1570public:1571void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,1572ArrayRef<SMLoc> Loc) override {1573SetTheory::RecSet Elts2;1574ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);1575Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));1576}1577};15781579class HighHalf : public SetTheory::Operator {1580public:1581void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,1582ArrayRef<SMLoc> Loc) override {1583SetTheory::RecSet Elts2;1584ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);1585Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());1586}1587};15881589class Rev : public SetTheory::Operator {1590unsigned ElementSize;15911592public:1593Rev(unsigned ElementSize) : ElementSize(ElementSize) {}15941595void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,1596ArrayRef<SMLoc> Loc) override {1597SetTheory::RecSet Elts2;1598ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);15991600int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();1601VectorSize /= ElementSize;16021603std::vector<Record *> Revved;1604for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {1605for (int LI = VectorSize - 1; LI >= 0; --LI) {1606Revved.push_back(Elts2[VI + LI]);1607}1608}16091610Elts.insert(Revved.begin(), Revved.end());1611}1612};16131614class MaskExpander : public SetTheory::Expander {1615unsigned N;16161617public:1618MaskExpander(unsigned N) : N(N) {}16191620void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override {1621unsigned Addend = 0;1622if (R->getName() == "mask0")1623Addend = 0;1624else if (R->getName() == "mask1")1625Addend = N;1626else1627return;1628for (unsigned I = 0; I < N; ++I)1629Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));1630}1631};16321633// (shuffle arg1, arg2, sequence)1634std::pair<Type, std::string> Arg1 =1635emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));1636std::pair<Type, std::string> Arg2 =1637emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));1638assert_with_loc(Arg1.first == Arg2.first,1639"Different types in arguments to shuffle!");16401641SetTheory ST;1642SetTheory::RecSet Elts;1643ST.addOperator("lowhalf", std::make_unique<LowHalf>());1644ST.addOperator("highhalf", std::make_unique<HighHalf>());1645ST.addOperator("rev",1646std::make_unique<Rev>(Arg1.first.getElementSizeInBits()));1647ST.addExpander("MaskExpand",1648std::make_unique<MaskExpander>(Arg1.first.getNumElements()));1649ST.evaluate(DI->getArg(2), Elts, std::nullopt);16501651std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;1652for (auto &E : Elts) {1653StringRef Name = E->getName();1654assert_with_loc(Name.starts_with("sv"),1655"Incorrect element kind in shuffle mask!");1656S += ", " + Name.drop_front(2).str();1657}1658S += ")";16591660// Recalculate the return type - the shuffle may have halved or doubled it.1661Type T(Arg1.first);1662if (Elts.size() > T.getNumElements()) {1663assert_with_loc(1664Elts.size() == T.getNumElements() * 2,1665"Can only double or half the number of elements in a shuffle!");1666T.doubleLanes();1667} else if (Elts.size() < T.getNumElements()) {1668assert_with_loc(1669Elts.size() == T.getNumElements() / 2,1670"Can only double or half the number of elements in a shuffle!");1671T.halveLanes();1672}16731674return std::make_pair(T, S);1675}16761677std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {1678assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");1679std::pair<Type, std::string> A =1680emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));1681assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");16821683Type T = Intr.getBaseType();1684assert_with_loc(T.isVector(), "dup() used but default type is scalar!");1685std::string S = "(" + T.str() + ") {";1686for (unsigned I = 0; I < T.getNumElements(); ++I) {1687if (I != 0)1688S += ", ";1689S += A.second;1690}1691S += "}";16921693return std::make_pair(T, S);1694}16951696std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) {1697assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments");1698std::pair<Type, std::string> B =1699emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));1700assert_with_loc(B.first.isScalar(),1701"dup_typed() requires a scalar as the second argument");1702Type T;1703// If the type argument is a constant string, construct the type directly.1704if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) {1705T = Type::fromTypedefName(SI->getAsUnquotedString());1706assert_with_loc(!T.isVoid(), "Unknown typedef");1707} else1708T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first;17091710assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!");1711std::string S = "(" + T.str() + ") {";1712for (unsigned I = 0; I < T.getNumElements(); ++I) {1713if (I != 0)1714S += ", ";1715S += B.second;1716}1717S += "}";17181719return std::make_pair(T, S);1720}17211722std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {1723assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");1724std::pair<Type, std::string> A =1725emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));1726std::pair<Type, std::string> B =1727emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));17281729assert_with_loc(B.first.isScalar(),1730"splat() requires a scalar int as the second argument");17311732std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;1733for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {1734S += ", " + B.second;1735}1736S += ")";17371738return std::make_pair(Intr.getBaseType(), S);1739}17401741std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {1742assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");1743std::pair<Type, std::string> A =1744emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));17451746assert_with_loc(!A.first.isVoid(),1747"Argument to save_temp() must have non-void type!");17481749std::string N = std::string(DI->getArgNameStr(0));1750assert_with_loc(!N.empty(),1751"save_temp() expects a name as the first argument");17521753assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(),1754"Variable already defined!");1755Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix);17561757std::string S =1758A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second;17591760return std::make_pair(Type::getVoid(), S);1761}17621763std::pair<Type, std::string>1764Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) {1765std::string S = Intr.Name;17661767assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");1768std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();1769std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();17701771size_t Idx = S.find(ToReplace);17721773assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");1774S.replace(Idx, ToReplace.size(), ReplaceWith);17751776return std::make_pair(Type::getVoid(), S);1777}17781779std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){1780std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();1781std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();1782return std::make_pair(Type::fromTypedefName(Ty), Value);1783}17841785std::pair<Type, std::string>1786Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) {1787if (!ArgName.empty()) {1788assert_with_loc(!Arg->isComplete(),1789"Arguments must either be DAGs or names, not both!");1790assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),1791"Variable not defined!");1792Variable &V = Intr.Variables[ArgName];1793return std::make_pair(V.getType(), V.getName());1794}17951796assert(Arg && "Neither ArgName nor Arg?!");1797DagInit *DI = dyn_cast<DagInit>(Arg);1798assert_with_loc(DI, "Arguments must either be DAGs or names!");17991800return emitDag(DI);1801}18021803std::string Intrinsic::generate() {1804// Avoid duplicated code for big and little endian1805if (isBigEndianSafe()) {1806generateImpl(false, "", "");1807return OS.str();1808}1809// Little endian intrinsics are simple and don't require any argument1810// swapping.1811OS << "#ifdef __LITTLE_ENDIAN__\n";18121813generateImpl(false, "", "");18141815OS << "#else\n";18161817// Big endian intrinsics are more complex. The user intended these1818// intrinsics to operate on a vector "as-if" loaded by (V)LDR,1819// but we load as-if (V)LD1. So we should swap all arguments and1820// swap the return value too.1821//1822// If we call sub-intrinsics, we should call a version that does1823// not re-swap the arguments!1824generateImpl(true, "", "__noswap_");18251826// If we're needed early, create a non-swapping variant for1827// big-endian.1828if (NeededEarly) {1829generateImpl(false, "__noswap_", "__noswap_");1830}1831OS << "#endif\n\n";18321833return OS.str();1834}18351836void Intrinsic::generateImpl(bool ReverseArguments,1837StringRef NamePrefix, StringRef CallPrefix) {1838CurrentRecord = R;18391840// If we call a macro, our local variables may be corrupted due to1841// lack of proper lexical scoping. So, add a globally unique postfix1842// to every variable.1843//1844// indexBody() should have set up the Dependencies set by now.1845for (auto *I : Dependencies)1846if (I->UseMacro) {1847VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());1848break;1849}18501851initVariables();18521853emitPrototype(NamePrefix);18541855if (IsUnavailable) {1856OS << " __attribute__((unavailable));";1857} else {1858emitOpeningBrace();1859// Emit return variable declaration first as to not trigger1860// -Wdeclaration-after-statement.1861emitReturnVarDecl();1862emitShadowedArgs();1863if (ReverseArguments)1864emitArgumentReversal();1865emitBody(CallPrefix);1866if (ReverseArguments)1867emitReturnReversal();1868emitReturn();1869emitClosingBrace();1870}1871OS << "\n";18721873CurrentRecord = nullptr;1874}18751876void Intrinsic::indexBody() {1877CurrentRecord = R;18781879initVariables();1880// Emit return variable declaration first as to not trigger1881// -Wdeclaration-after-statement.1882emitReturnVarDecl();1883emitBody("");1884OS.str("");18851886CurrentRecord = nullptr;1887}18881889//===----------------------------------------------------------------------===//1890// NeonEmitter implementation1891//===----------------------------------------------------------------------===//18921893Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,1894std::optional<std::string> MangledName) {1895// First, look up the name in the intrinsic map.1896assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),1897("Intrinsic '" + Name + "' not found!").str());1898auto &V = IntrinsicMap.find(Name.str())->second;1899std::vector<Intrinsic *> GoodVec;19001901// Create a string to print if we end up failing.1902std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";1903for (unsigned I = 0; I < Types.size(); ++I) {1904if (I != 0)1905ErrMsg += ", ";1906ErrMsg += Types[I].str();1907}1908ErrMsg += ")'\n";1909ErrMsg += "Available overloads:\n";19101911// Now, look through each intrinsic implementation and see if the types are1912// compatible.1913for (auto &I : V) {1914ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName();1915ErrMsg += "(";1916for (unsigned A = 0; A < I.getNumParams(); ++A) {1917if (A != 0)1918ErrMsg += ", ";1919ErrMsg += I.getParamType(A).str();1920}1921ErrMsg += ")\n";19221923if (MangledName && MangledName != I.getMangledName(true))1924continue;19251926if (I.getNumParams() != Types.size())1927continue;19281929unsigned ArgNum = 0;1930bool MatchingArgumentTypes = llvm::all_of(Types, [&](const auto &Type) {1931return Type == I.getParamType(ArgNum++);1932});19331934if (MatchingArgumentTypes)1935GoodVec.push_back(&I);1936}19371938assert_with_loc(!GoodVec.empty(),1939"No compatible intrinsic found - " + ErrMsg);1940assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);19411942return *GoodVec.front();1943}19441945void NeonEmitter::createIntrinsic(Record *R,1946SmallVectorImpl<Intrinsic *> &Out) {1947std::string Name = std::string(R->getValueAsString("Name"));1948std::string Proto = std::string(R->getValueAsString("Prototype"));1949std::string Types = std::string(R->getValueAsString("Types"));1950Record *OperationRec = R->getValueAsDef("Operation");1951bool BigEndianSafe = R->getValueAsBit("BigEndianSafe");1952std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));1953std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));1954bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");1955std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));19561957// Set the global current record. This allows assert_with_loc to produce1958// decent location information even when highly nested.1959CurrentRecord = R;19601961ListInit *Body = OperationRec->getValueAsListInit("Ops");19621963std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);19641965ClassKind CK = ClassNone;1966if (R->getSuperClasses().size() >= 2)1967CK = ClassMap[R->getSuperClasses()[1].first];19681969std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;1970if (!CartesianProductWith.empty()) {1971std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith);1972for (auto TS : TypeSpecs) {1973Type DefaultT(TS, ".");1974for (auto SrcTS : ProductTypeSpecs) {1975Type DefaultSrcT(SrcTS, ".");1976if (TS == SrcTS ||1977DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())1978continue;1979NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));1980}1981}1982} else {1983for (auto TS : TypeSpecs) {1984NewTypeSpecs.push_back(std::make_pair(TS, TS));1985}1986}19871988llvm::sort(NewTypeSpecs);1989NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()),1990NewTypeSpecs.end());1991auto &Entry = IntrinsicMap[Name];19921993for (auto &I : NewTypeSpecs) {1994Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,1995ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);1996Out.push_back(&Entry.back());1997}19981999CurrentRecord = nullptr;2000}20012002/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def2003/// declaration of builtins, checking for unique builtin declarations.2004void NeonEmitter::genBuiltinsDef(raw_ostream &OS,2005SmallVectorImpl<Intrinsic *> &Defs) {2006OS << "#ifdef GET_NEON_BUILTINS\n";20072008// We only want to emit a builtin once, and we want to emit them in2009// alphabetical order, so use a std::set.2010std::set<std::pair<std::string, std::string>> Builtins;20112012for (auto *Def : Defs) {2013if (Def->hasBody())2014continue;20152016std::string S = "__builtin_neon_" + Def->getMangledName() + ", \"";2017S += Def->getBuiltinTypeStr();2018S += "\", \"n\"";20192020Builtins.emplace(S, Def->getTargetGuard());2021}20222023for (auto &S : Builtins) {2024if (S.second == "")2025OS << "BUILTIN(";2026else2027OS << "TARGET_BUILTIN(";2028OS << S.first;2029if (S.second == "")2030OS << ")\n";2031else2032OS << ", \"" << S.second << "\")\n";2033}20342035OS << "#endif\n\n";2036}20372038void NeonEmitter::genStreamingSVECompatibleList(2039raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {2040OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n";20412042std::set<std::string> Emitted;2043for (auto *Def : Defs) {2044// If the def has a body (that is, it has Operation DAGs), it won't call2045// __builtin_neon_* so we don't need to generate a definition for it.2046if (Def->hasBody())2047continue;20482049std::string Name = Def->getMangledName();2050if (Emitted.find(Name) != Emitted.end())2051continue;20522053// FIXME: We should make exceptions here for some NEON builtins that are2054// permitted in streaming mode.2055OS << "case NEON::BI__builtin_neon_" << Name2056<< ": BuiltinType = ArmNonStreaming; break;\n";2057Emitted.insert(Name);2058}2059OS << "#endif\n\n";2060}20612062/// Generate the ARM and AArch64 overloaded type checking code for2063/// SemaChecking.cpp, checking for unique builtin declarations.2064void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,2065SmallVectorImpl<Intrinsic *> &Defs) {2066OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";20672068// We record each overload check line before emitting because subsequent Inst2069// definitions may extend the number of permitted types (i.e. augment the2070// Mask). Use std::map to avoid sorting the table by hash number.2071struct OverloadInfo {2072uint64_t Mask = 0ULL;2073int PtrArgNum = 0;2074bool HasConstPtr = false;2075OverloadInfo() = default;2076};2077std::map<std::string, OverloadInfo> OverloadMap;20782079for (auto *Def : Defs) {2080// If the def has a body (that is, it has Operation DAGs), it won't call2081// __builtin_neon_* so we don't need to generate a definition for it.2082if (Def->hasBody())2083continue;2084// Functions which have a scalar argument cannot be overloaded, no need to2085// check them if we are emitting the type checking code.2086if (Def->protoHasScalar())2087continue;20882089uint64_t Mask = 0ULL;2090Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum();20912092// Check if the function has a pointer or const pointer argument.2093int PtrArgNum = -1;2094bool HasConstPtr = false;2095for (unsigned I = 0; I < Def->getNumParams(); ++I) {2096const auto &Type = Def->getParamType(I);2097if (Type.isPointer()) {2098PtrArgNum = I;2099HasConstPtr = Type.isConstPointer();2100}2101}21022103// For sret builtins, adjust the pointer argument index.2104if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)2105PtrArgNum += 1;21062107std::string Name = Def->getName();2108// Omit type checking for the pointer arguments of vld1_lane, vld1_dup,2109// vst1_lane, vldap1_lane, and vstl1_lane intrinsics. Using a pointer to2110// the vector element type with one of those operations causes codegen to2111// select an aligned load/store instruction. If you want an unaligned2112// operation, the pointer argument needs to have less alignment than element2113// type, so just accept any pointer type.2114if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane" ||2115Name == "vldap1_lane" || Name == "vstl1_lane") {2116PtrArgNum = -1;2117HasConstPtr = false;2118}21192120if (Mask) {2121std::string Name = Def->getMangledName();2122OverloadMap.insert(std::make_pair(Name, OverloadInfo()));2123OverloadInfo &OI = OverloadMap[Name];2124OI.Mask |= Mask;2125OI.PtrArgNum |= PtrArgNum;2126OI.HasConstPtr = HasConstPtr;2127}2128}21292130for (auto &I : OverloadMap) {2131OverloadInfo &OI = I.second;21322133OS << "case NEON::BI__builtin_neon_" << I.first << ": ";2134OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";2135if (OI.PtrArgNum >= 0)2136OS << "; PtrArgNum = " << OI.PtrArgNum;2137if (OI.HasConstPtr)2138OS << "; HasConstPtr = true";2139OS << "; break;\n";2140}2141OS << "#endif\n\n";2142}21432144void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,2145SmallVectorImpl<Intrinsic *> &Defs) {2146OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";21472148std::set<std::string> Emitted;21492150for (auto *Def : Defs) {2151if (Def->hasBody())2152continue;2153// Functions which do not have an immediate do not need to have range2154// checking code emitted.2155if (!Def->hasImmediate())2156continue;2157if (Emitted.find(Def->getMangledName()) != Emitted.end())2158continue;21592160std::string LowerBound, UpperBound;21612162Record *R = Def->getRecord();2163if (R->getValueAsBit("isVXAR")) {2164//VXAR takes an immediate in the range [0, 63]2165LowerBound = "0";2166UpperBound = "63";2167} else if (R->getValueAsBit("isVCVT_N")) {2168// VCVT between floating- and fixed-point values takes an immediate2169// in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.2170LowerBound = "1";2171if (Def->getBaseType().getElementSizeInBits() == 16 ||2172Def->getName().find('h') != std::string::npos)2173// VCVTh operating on FP16 intrinsics in range [1, 16)2174UpperBound = "15";2175else if (Def->getBaseType().getElementSizeInBits() == 32)2176UpperBound = "31";2177else2178UpperBound = "63";2179} else if (R->getValueAsBit("isScalarShift")) {2180// Right shifts have an 'r' in the name, left shifts do not. Convert2181// instructions have the same bounds and right shifts.2182if (Def->getName().find('r') != std::string::npos ||2183Def->getName().find("cvt") != std::string::npos)2184LowerBound = "1";21852186UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);2187} else if (R->getValueAsBit("isShift")) {2188// Builtins which are overloaded by type will need to have their upper2189// bound computed at Sema time based on the type constant.21902191// Right shifts have an 'r' in the name, left shifts do not.2192if (Def->getName().find('r') != std::string::npos)2193LowerBound = "1";2194UpperBound = "RFT(TV, true)";2195} else if (Def->getClassKind(true) == ClassB) {2196// ClassB intrinsics have a type (and hence lane number) that is only2197// known at runtime.2198if (R->getValueAsBit("isLaneQ"))2199UpperBound = "RFT(TV, false, true)";2200else2201UpperBound = "RFT(TV, false, false)";2202} else {2203// The immediate generally refers to a lane in the preceding argument.2204assert(Def->getImmediateIdx() > 0);2205Type T = Def->getParamType(Def->getImmediateIdx() - 1);2206UpperBound = utostr(T.getNumElements() - 1);2207}22082209// Calculate the index of the immediate that should be range checked.2210unsigned Idx = Def->getNumParams();2211if (Def->hasImmediate())2212Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());22132214OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "2215<< "i = " << Idx << ";";2216if (!LowerBound.empty())2217OS << " l = " << LowerBound << ";";2218if (!UpperBound.empty())2219OS << " u = " << UpperBound << ";";2220OS << " break;\n";22212222Emitted.insert(Def->getMangledName());2223}22242225OS << "#endif\n\n";2226}22272228/// runHeader - Emit a file with sections defining:2229/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.2230/// 2. the SemaChecking code for the type overload checking.2231/// 3. the SemaChecking code for validation of intrinsic immediate arguments.2232void NeonEmitter::runHeader(raw_ostream &OS) {2233std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");22342235SmallVector<Intrinsic *, 128> Defs;2236for (auto *R : RV)2237createIntrinsic(R, Defs);22382239// Generate shared BuiltinsXXX.def2240genBuiltinsDef(OS, Defs);22412242// Generate ARM overloaded type checking code for SemaChecking.cpp2243genOverloadTypeCheckCode(OS, Defs);22442245genStreamingSVECompatibleList(OS, Defs);22462247// Generate ARM range checking code for shift/lane immediates.2248genIntrinsicRangeCheckCode(OS, Defs);2249}22502251static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {2252std::string TypedefTypes(types);2253std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);22542255// Emit vector typedefs.2256bool InIfdef = false;2257for (auto &TS : TDTypeVec) {2258bool IsA64 = false;2259Type T(TS, ".");2260if (T.isDouble())2261IsA64 = true;22622263if (InIfdef && !IsA64) {2264OS << "#endif\n";2265InIfdef = false;2266}2267if (!InIfdef && IsA64) {2268OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";2269InIfdef = true;2270}22712272if (T.isPoly())2273OS << "typedef __attribute__((neon_polyvector_type(";2274else2275OS << "typedef __attribute__((neon_vector_type(";22762277Type T2 = T;2278T2.makeScalar();2279OS << T.getNumElements() << "))) ";2280OS << T2.str();2281OS << " " << T.str() << ";\n";2282}2283if (InIfdef)2284OS << "#endif\n";2285OS << "\n";22862287// Emit struct typedefs.2288InIfdef = false;2289for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {2290for (auto &TS : TDTypeVec) {2291bool IsA64 = false;2292Type T(TS, ".");2293if (T.isDouble())2294IsA64 = true;22952296if (InIfdef && !IsA64) {2297OS << "#endif\n";2298InIfdef = false;2299}2300if (!InIfdef && IsA64) {2301OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";2302InIfdef = true;2303}23042305const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};2306Type VT(TS, Mods);2307OS << "typedef struct " << VT.str() << " {\n";2308OS << " " << T.str() << " val";2309OS << "[" << NumMembers << "]";2310OS << ";\n} ";2311OS << VT.str() << ";\n";2312OS << "\n";2313}2314}2315if (InIfdef)2316OS << "#endif\n";2317}23182319/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h2320/// is comprised of type definitions and function declarations.2321void NeonEmitter::run(raw_ostream &OS) {2322OS << "/*===---- arm_neon.h - ARM Neon intrinsics "2323"------------------------------"2324"---===\n"2325" *\n"2326" * Permission is hereby granted, free of charge, to any person "2327"obtaining "2328"a copy\n"2329" * of this software and associated documentation files (the "2330"\"Software\"),"2331" to deal\n"2332" * in the Software without restriction, including without limitation "2333"the "2334"rights\n"2335" * to use, copy, modify, merge, publish, distribute, sublicense, "2336"and/or sell\n"2337" * copies of the Software, and to permit persons to whom the Software "2338"is\n"2339" * furnished to do so, subject to the following conditions:\n"2340" *\n"2341" * The above copyright notice and this permission notice shall be "2342"included in\n"2343" * all copies or substantial portions of the Software.\n"2344" *\n"2345" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "2346"EXPRESS OR\n"2347" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "2348"MERCHANTABILITY,\n"2349" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "2350"SHALL THE\n"2351" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "2352"OTHER\n"2353" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "2354"ARISING FROM,\n"2355" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "2356"DEALINGS IN\n"2357" * THE SOFTWARE.\n"2358" *\n"2359" *===-----------------------------------------------------------------"2360"---"2361"---===\n"2362" */\n\n";23632364OS << "#ifndef __ARM_NEON_H\n";2365OS << "#define __ARM_NEON_H\n\n";23662367OS << "#ifndef __ARM_FP\n";2368OS << "#error \"NEON intrinsics not available with the soft-float ABI. "2369"Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";2370OS << "#else\n\n";23712372OS << "#include <stdint.h>\n\n";23732374OS << "#include <arm_bf16.h>\n";23752376OS << "#include <arm_vector_types.h>\n";23772378// For now, signedness of polynomial types depends on target2379OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";2380OS << "typedef uint8_t poly8_t;\n";2381OS << "typedef uint16_t poly16_t;\n";2382OS << "typedef uint64_t poly64_t;\n";2383OS << "typedef __uint128_t poly128_t;\n";2384OS << "#else\n";2385OS << "typedef int8_t poly8_t;\n";2386OS << "typedef int16_t poly16_t;\n";2387OS << "typedef int64_t poly64_t;\n";2388OS << "#endif\n";2389emitNeonTypeDefs("PcQPcPsQPsPlQPl", OS);23902391OS << "#define __ai static __inline__ __attribute__((__always_inline__, "2392"__nodebug__))\n\n";23932394SmallVector<Intrinsic *, 128> Defs;2395std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");2396for (auto *R : RV)2397createIntrinsic(R, Defs);23982399for (auto *I : Defs)2400I->indexBody();24012402llvm::stable_sort(Defs, llvm::deref<std::less<>>());24032404// Only emit a def when its requirements have been met.2405// FIXME: This loop could be made faster, but it's fast enough for now.2406bool MadeProgress = true;2407std::string InGuard;2408while (!Defs.empty() && MadeProgress) {2409MadeProgress = false;24102411for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();2412I != Defs.end(); /*No step*/) {2413bool DependenciesSatisfied = true;2414for (auto *II : (*I)->getDependencies()) {2415if (llvm::is_contained(Defs, II))2416DependenciesSatisfied = false;2417}2418if (!DependenciesSatisfied) {2419// Try the next one.2420++I;2421continue;2422}24232424// Emit #endif/#if pair if needed.2425if ((*I)->getArchGuard() != InGuard) {2426if (!InGuard.empty())2427OS << "#endif\n";2428InGuard = (*I)->getArchGuard();2429if (!InGuard.empty())2430OS << "#if " << InGuard << "\n";2431}24322433// Actually generate the intrinsic code.2434OS << (*I)->generate();24352436MadeProgress = true;2437I = Defs.erase(I);2438}2439}2440assert(Defs.empty() && "Some requirements were not satisfied!");2441if (!InGuard.empty())2442OS << "#endif\n";24432444OS << "\n";2445OS << "#undef __ai\n\n";2446OS << "#endif /* if !defined(__ARM_NEON) */\n";2447OS << "#endif /* ifndef __ARM_FP */\n";2448}24492450/// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h2451/// is comprised of type definitions and function declarations.2452void NeonEmitter::runFP16(raw_ostream &OS) {2453OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "2454"------------------------------"2455"---===\n"2456" *\n"2457" * Permission is hereby granted, free of charge, to any person "2458"obtaining a copy\n"2459" * of this software and associated documentation files (the "2460"\"Software\"), to deal\n"2461" * in the Software without restriction, including without limitation "2462"the rights\n"2463" * to use, copy, modify, merge, publish, distribute, sublicense, "2464"and/or sell\n"2465" * copies of the Software, and to permit persons to whom the Software "2466"is\n"2467" * furnished to do so, subject to the following conditions:\n"2468" *\n"2469" * The above copyright notice and this permission notice shall be "2470"included in\n"2471" * all copies or substantial portions of the Software.\n"2472" *\n"2473" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "2474"EXPRESS OR\n"2475" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "2476"MERCHANTABILITY,\n"2477" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "2478"SHALL THE\n"2479" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "2480"OTHER\n"2481" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "2482"ARISING FROM,\n"2483" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "2484"DEALINGS IN\n"2485" * THE SOFTWARE.\n"2486" *\n"2487" *===-----------------------------------------------------------------"2488"---"2489"---===\n"2490" */\n\n";24912492OS << "#ifndef __ARM_FP16_H\n";2493OS << "#define __ARM_FP16_H\n\n";24942495OS << "#include <stdint.h>\n\n";24962497OS << "typedef __fp16 float16_t;\n";24982499OS << "#define __ai static __inline__ __attribute__((__always_inline__, "2500"__nodebug__))\n\n";25012502SmallVector<Intrinsic *, 128> Defs;2503std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");2504for (auto *R : RV)2505createIntrinsic(R, Defs);25062507for (auto *I : Defs)2508I->indexBody();25092510llvm::stable_sort(Defs, llvm::deref<std::less<>>());25112512// Only emit a def when its requirements have been met.2513// FIXME: This loop could be made faster, but it's fast enough for now.2514bool MadeProgress = true;2515std::string InGuard;2516while (!Defs.empty() && MadeProgress) {2517MadeProgress = false;25182519for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();2520I != Defs.end(); /*No step*/) {2521bool DependenciesSatisfied = true;2522for (auto *II : (*I)->getDependencies()) {2523if (llvm::is_contained(Defs, II))2524DependenciesSatisfied = false;2525}2526if (!DependenciesSatisfied) {2527// Try the next one.2528++I;2529continue;2530}25312532// Emit #endif/#if pair if needed.2533if ((*I)->getArchGuard() != InGuard) {2534if (!InGuard.empty())2535OS << "#endif\n";2536InGuard = (*I)->getArchGuard();2537if (!InGuard.empty())2538OS << "#if " << InGuard << "\n";2539}25402541// Actually generate the intrinsic code.2542OS << (*I)->generate();25432544MadeProgress = true;2545I = Defs.erase(I);2546}2547}2548assert(Defs.empty() && "Some requirements were not satisfied!");2549if (!InGuard.empty())2550OS << "#endif\n";25512552OS << "\n";2553OS << "#undef __ai\n\n";2554OS << "#endif /* __ARM_FP16_H */\n";2555}25562557void NeonEmitter::runVectorTypes(raw_ostream &OS) {2558OS << "/*===---- arm_vector_types - ARM vector type "2559"------===\n"2560" *\n"2561" *\n"2562" * Part of the LLVM Project, under the Apache License v2.0 with LLVM "2563"Exceptions.\n"2564" * See https://llvm.org/LICENSE.txt for license information.\n"2565" * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"2566" *\n"2567" *===-----------------------------------------------------------------"2568"------===\n"2569" */\n\n";2570OS << "#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)\n";2571OS << "#error \"This file should not be used standalone. Please include"2572" arm_neon.h or arm_sve.h instead\"\n\n";2573OS << "#endif\n";2574OS << "#ifndef __ARM_NEON_TYPES_H\n";2575OS << "#define __ARM_NEON_TYPES_H\n";2576OS << "typedef float float32_t;\n";2577OS << "typedef __fp16 float16_t;\n";25782579OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";2580OS << "typedef double float64_t;\n";2581OS << "#endif\n\n";25822583emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQd", OS);25842585emitNeonTypeDefs("bQb", OS);2586OS << "#endif // __ARM_NEON_TYPES_H\n";2587}25882589void NeonEmitter::runBF16(raw_ostream &OS) {2590OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "2591"-----------------------------------===\n"2592" *\n"2593" *\n"2594" * Part of the LLVM Project, under the Apache License v2.0 with LLVM "2595"Exceptions.\n"2596" * See https://llvm.org/LICENSE.txt for license information.\n"2597" * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"2598" *\n"2599" *===-----------------------------------------------------------------"2600"------===\n"2601" */\n\n";26022603OS << "#ifndef __ARM_BF16_H\n";2604OS << "#define __ARM_BF16_H\n\n";26052606OS << "typedef __bf16 bfloat16_t;\n";26072608OS << "#define __ai static __inline__ __attribute__((__always_inline__, "2609"__nodebug__))\n\n";26102611SmallVector<Intrinsic *, 128> Defs;2612std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");2613for (auto *R : RV)2614createIntrinsic(R, Defs);26152616for (auto *I : Defs)2617I->indexBody();26182619llvm::stable_sort(Defs, llvm::deref<std::less<>>());26202621// Only emit a def when its requirements have been met.2622// FIXME: This loop could be made faster, but it's fast enough for now.2623bool MadeProgress = true;2624std::string InGuard;2625while (!Defs.empty() && MadeProgress) {2626MadeProgress = false;26272628for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();2629I != Defs.end(); /*No step*/) {2630bool DependenciesSatisfied = true;2631for (auto *II : (*I)->getDependencies()) {2632if (llvm::is_contained(Defs, II))2633DependenciesSatisfied = false;2634}2635if (!DependenciesSatisfied) {2636// Try the next one.2637++I;2638continue;2639}26402641// Emit #endif/#if pair if needed.2642if ((*I)->getArchGuard() != InGuard) {2643if (!InGuard.empty())2644OS << "#endif\n";2645InGuard = (*I)->getArchGuard();2646if (!InGuard.empty())2647OS << "#if " << InGuard << "\n";2648}26492650// Actually generate the intrinsic code.2651OS << (*I)->generate();26522653MadeProgress = true;2654I = Defs.erase(I);2655}2656}2657assert(Defs.empty() && "Some requirements were not satisfied!");2658if (!InGuard.empty())2659OS << "#endif\n";26602661OS << "\n";2662OS << "#undef __ai\n\n";26632664OS << "#endif\n";2665}26662667void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) {2668NeonEmitter(Records).run(OS);2669}26702671void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) {2672NeonEmitter(Records).runFP16(OS);2673}26742675void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) {2676NeonEmitter(Records).runBF16(OS);2677}26782679void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {2680NeonEmitter(Records).runHeader(OS);2681}26822683void clang::EmitVectorTypes(RecordKeeper &Records, raw_ostream &OS) {2684NeonEmitter(Records).runVectorTypes(OS);2685}26862687void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {2688llvm_unreachable("Neon test generation no longer implemented!");2689}269026912692