Path: blob/main/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp
35234 views
//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// Implement the Lexer for .ll files.9//10//===----------------------------------------------------------------------===//1112#include "llvm/AsmParser/LLLexer.h"13#include "llvm/ADT/APInt.h"14#include "llvm/ADT/STLExtras.h"15#include "llvm/ADT/StringExtras.h"16#include "llvm/ADT/Twine.h"17#include "llvm/IR/DerivedTypes.h"18#include "llvm/IR/Instruction.h"19#include "llvm/Support/ErrorHandling.h"20#include "llvm/Support/SourceMgr.h"21#include <cassert>22#include <cctype>23#include <cstdio>2425using namespace llvm;2627bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {28ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);29return true;30}3132void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {33SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);34}3536//===----------------------------------------------------------------------===//37// Helper functions.38//===----------------------------------------------------------------------===//3940// atoull - Convert an ascii string of decimal digits into the unsigned long41// long representation... this does not have to do input error checking,42// because we know that the input will be matched by a suitable regex...43//44uint64_t LLLexer::atoull(const char *Buffer, const char *End) {45uint64_t Result = 0;46for (; Buffer != End; Buffer++) {47uint64_t OldRes = Result;48Result *= 10;49Result += *Buffer-'0';50if (Result < OldRes) { // Uh, oh, overflow detected!!!51Error("constant bigger than 64 bits detected!");52return 0;53}54}55return Result;56}5758uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {59uint64_t Result = 0;60for (; Buffer != End; ++Buffer) {61uint64_t OldRes = Result;62Result *= 16;63Result += hexDigitValue(*Buffer);6465if (Result < OldRes) { // Uh, oh, overflow detected!!!66Error("constant bigger than 64 bits detected!");67return 0;68}69}70return Result;71}7273void LLLexer::HexToIntPair(const char *Buffer, const char *End,74uint64_t Pair[2]) {75Pair[0] = 0;76if (End - Buffer >= 16) {77for (int i = 0; i < 16; i++, Buffer++) {78assert(Buffer != End);79Pair[0] *= 16;80Pair[0] += hexDigitValue(*Buffer);81}82}83Pair[1] = 0;84for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {85Pair[1] *= 16;86Pair[1] += hexDigitValue(*Buffer);87}88if (Buffer != End)89Error("constant bigger than 128 bits detected!");90}9192/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into93/// { low64, high16 } as usual for an APInt.94void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,95uint64_t Pair[2]) {96Pair[1] = 0;97for (int i=0; i<4 && Buffer != End; i++, Buffer++) {98assert(Buffer != End);99Pair[1] *= 16;100Pair[1] += hexDigitValue(*Buffer);101}102Pair[0] = 0;103for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {104Pair[0] *= 16;105Pair[0] += hexDigitValue(*Buffer);106}107if (Buffer != End)108Error("constant bigger than 128 bits detected!");109}110111// UnEscapeLexed - Run through the specified buffer and change \xx codes to the112// appropriate character.113static void UnEscapeLexed(std::string &Str) {114if (Str.empty()) return;115116char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();117char *BOut = Buffer;118for (char *BIn = Buffer; BIn != EndBuffer; ) {119if (BIn[0] == '\\') {120if (BIn < EndBuffer-1 && BIn[1] == '\\') {121*BOut++ = '\\'; // Two \ becomes one122BIn += 2;123} else if (BIn < EndBuffer-2 &&124isxdigit(static_cast<unsigned char>(BIn[1])) &&125isxdigit(static_cast<unsigned char>(BIn[2]))) {126*BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);127BIn += 3; // Skip over handled chars128++BOut;129} else {130*BOut++ = *BIn++;131}132} else {133*BOut++ = *BIn++;134}135}136Str.resize(BOut-Buffer);137}138139/// isLabelChar - Return true for [-a-zA-Z$._0-9].140static bool isLabelChar(char C) {141return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||142C == '.' || C == '_';143}144145/// isLabelTail - Return true if this pointer points to a valid end of a label.146static const char *isLabelTail(const char *CurPtr) {147while (true) {148if (CurPtr[0] == ':') return CurPtr+1;149if (!isLabelChar(CurPtr[0])) return nullptr;150++CurPtr;151}152}153154//===----------------------------------------------------------------------===//155// Lexer definition.156//===----------------------------------------------------------------------===//157158LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,159LLVMContext &C)160: CurBuf(StartBuf), ErrorInfo(Err), SM(SM), Context(C) {161CurPtr = CurBuf.begin();162}163164int LLLexer::getNextChar() {165char CurChar = *CurPtr++;166switch (CurChar) {167default: return (unsigned char)CurChar;168case 0:169// A nul character in the stream is either the end of the current buffer or170// a random nul in the file. Disambiguate that here.171if (CurPtr-1 != CurBuf.end())172return 0; // Just whitespace.173174// Otherwise, return end of file.175--CurPtr; // Another call to lex will return EOF again.176return EOF;177}178}179180lltok::Kind LLLexer::LexToken() {181while (true) {182TokStart = CurPtr;183184int CurChar = getNextChar();185switch (CurChar) {186default:187// Handle letters: [a-zA-Z_]188if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')189return LexIdentifier();190191return lltok::Error;192case EOF: return lltok::Eof;193case 0:194case ' ':195case '\t':196case '\n':197case '\r':198// Ignore whitespace.199continue;200case '+': return LexPositive();201case '@': return LexAt();202case '$': return LexDollar();203case '%': return LexPercent();204case '"': return LexQuote();205case '.':206if (const char *Ptr = isLabelTail(CurPtr)) {207CurPtr = Ptr;208StrVal.assign(TokStart, CurPtr-1);209return lltok::LabelStr;210}211if (CurPtr[0] == '.' && CurPtr[1] == '.') {212CurPtr += 2;213return lltok::dotdotdot;214}215return lltok::Error;216case ';':217SkipLineComment();218continue;219case '!': return LexExclaim();220case '^':221return LexCaret();222case ':':223return lltok::colon;224case '#': return LexHash();225case '0': case '1': case '2': case '3': case '4':226case '5': case '6': case '7': case '8': case '9':227case '-':228return LexDigitOrNegative();229case '=': return lltok::equal;230case '[': return lltok::lsquare;231case ']': return lltok::rsquare;232case '{': return lltok::lbrace;233case '}': return lltok::rbrace;234case '<': return lltok::less;235case '>': return lltok::greater;236case '(': return lltok::lparen;237case ')': return lltok::rparen;238case ',': return lltok::comma;239case '*': return lltok::star;240case '|': return lltok::bar;241}242}243}244245void LLLexer::SkipLineComment() {246while (true) {247if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)248return;249}250}251252/// Lex all tokens that start with an @ character.253/// GlobalVar @\"[^\"]*\"254/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*255/// GlobalVarID @[0-9]+256lltok::Kind LLLexer::LexAt() {257return LexVar(lltok::GlobalVar, lltok::GlobalID);258}259260lltok::Kind LLLexer::LexDollar() {261if (const char *Ptr = isLabelTail(TokStart)) {262CurPtr = Ptr;263StrVal.assign(TokStart, CurPtr - 1);264return lltok::LabelStr;265}266267// Handle DollarStringConstant: $\"[^\"]*\"268if (CurPtr[0] == '"') {269++CurPtr;270271while (true) {272int CurChar = getNextChar();273274if (CurChar == EOF) {275Error("end of file in COMDAT variable name");276return lltok::Error;277}278if (CurChar == '"') {279StrVal.assign(TokStart + 2, CurPtr - 1);280UnEscapeLexed(StrVal);281if (StringRef(StrVal).contains(0)) {282Error("Null bytes are not allowed in names");283return lltok::Error;284}285return lltok::ComdatVar;286}287}288}289290// Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*291if (ReadVarName())292return lltok::ComdatVar;293294return lltok::Error;295}296297/// ReadString - Read a string until the closing quote.298lltok::Kind LLLexer::ReadString(lltok::Kind kind) {299const char *Start = CurPtr;300while (true) {301int CurChar = getNextChar();302303if (CurChar == EOF) {304Error("end of file in string constant");305return lltok::Error;306}307if (CurChar == '"') {308StrVal.assign(Start, CurPtr-1);309UnEscapeLexed(StrVal);310return kind;311}312}313}314315/// ReadVarName - Read the rest of a token containing a variable name.316bool LLLexer::ReadVarName() {317const char *NameStart = CurPtr;318if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||319CurPtr[0] == '-' || CurPtr[0] == '$' ||320CurPtr[0] == '.' || CurPtr[0] == '_') {321++CurPtr;322while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||323CurPtr[0] == '-' || CurPtr[0] == '$' ||324CurPtr[0] == '.' || CurPtr[0] == '_')325++CurPtr;326327StrVal.assign(NameStart, CurPtr);328return true;329}330return false;331}332333// Lex an ID: [0-9]+. On success, the ID is stored in UIntVal and Token is334// returned, otherwise the Error token is returned.335lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {336if (!isdigit(static_cast<unsigned char>(CurPtr[0])))337return lltok::Error;338339for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)340/*empty*/;341342uint64_t Val = atoull(TokStart + 1, CurPtr);343if ((unsigned)Val != Val)344Error("invalid value number (too large)!");345UIntVal = unsigned(Val);346return Token;347}348349lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {350// Handle StringConstant: \"[^\"]*\"351if (CurPtr[0] == '"') {352++CurPtr;353354while (true) {355int CurChar = getNextChar();356357if (CurChar == EOF) {358Error("end of file in global variable name");359return lltok::Error;360}361if (CurChar == '"') {362StrVal.assign(TokStart+2, CurPtr-1);363UnEscapeLexed(StrVal);364if (StringRef(StrVal).contains(0)) {365Error("Null bytes are not allowed in names");366return lltok::Error;367}368return Var;369}370}371}372373// Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*374if (ReadVarName())375return Var;376377// Handle VarID: [0-9]+378return LexUIntID(VarID);379}380381/// Lex all tokens that start with a % character.382/// LocalVar ::= %\"[^\"]*\"383/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*384/// LocalVarID ::= %[0-9]+385lltok::Kind LLLexer::LexPercent() {386return LexVar(lltok::LocalVar, lltok::LocalVarID);387}388389/// Lex all tokens that start with a " character.390/// QuoteLabel "[^"]+":391/// StringConstant "[^"]*"392lltok::Kind LLLexer::LexQuote() {393lltok::Kind kind = ReadString(lltok::StringConstant);394if (kind == lltok::Error || kind == lltok::Eof)395return kind;396397if (CurPtr[0] == ':') {398++CurPtr;399if (StringRef(StrVal).contains(0)) {400Error("Null bytes are not allowed in names");401kind = lltok::Error;402} else {403kind = lltok::LabelStr;404}405}406407return kind;408}409410/// Lex all tokens that start with a ! character.411/// !foo412/// !413lltok::Kind LLLexer::LexExclaim() {414// Lex a metadata name as a MetadataVar.415if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||416CurPtr[0] == '-' || CurPtr[0] == '$' ||417CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {418++CurPtr;419while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||420CurPtr[0] == '-' || CurPtr[0] == '$' ||421CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')422++CurPtr;423424StrVal.assign(TokStart+1, CurPtr); // Skip !425UnEscapeLexed(StrVal);426return lltok::MetadataVar;427}428return lltok::exclaim;429}430431/// Lex all tokens that start with a ^ character.432/// SummaryID ::= ^[0-9]+433lltok::Kind LLLexer::LexCaret() {434// Handle SummaryID: ^[0-9]+435return LexUIntID(lltok::SummaryID);436}437438/// Lex all tokens that start with a # character.439/// AttrGrpID ::= #[0-9]+440/// Hash ::= #441lltok::Kind LLLexer::LexHash() {442// Handle AttrGrpID: #[0-9]+443if (isdigit(static_cast<unsigned char>(CurPtr[0])))444return LexUIntID(lltok::AttrGrpID);445return lltok::hash;446}447448/// Lex a label, integer type, keyword, or hexadecimal integer constant.449/// Label [-a-zA-Z$._0-9]+:450/// IntegerType i[0-9]+451/// Keyword sdiv, float, ...452/// HexIntConstant [us]0x[0-9A-Fa-f]+453lltok::Kind LLLexer::LexIdentifier() {454const char *StartChar = CurPtr;455const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;456const char *KeywordEnd = nullptr;457458for (; isLabelChar(*CurPtr); ++CurPtr) {459// If we decide this is an integer, remember the end of the sequence.460if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))461IntEnd = CurPtr;462if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&463*CurPtr != '_')464KeywordEnd = CurPtr;465}466467// If we stopped due to a colon, unless we were directed to ignore it,468// this really is a label.469if (!IgnoreColonInIdentifiers && *CurPtr == ':') {470StrVal.assign(StartChar-1, CurPtr++);471return lltok::LabelStr;472}473474// Otherwise, this wasn't a label. If this was valid as an integer type,475// return it.476if (!IntEnd) IntEnd = CurPtr;477if (IntEnd != StartChar) {478CurPtr = IntEnd;479uint64_t NumBits = atoull(StartChar, CurPtr);480if (NumBits < IntegerType::MIN_INT_BITS ||481NumBits > IntegerType::MAX_INT_BITS) {482Error("bitwidth for integer type out of range!");483return lltok::Error;484}485TyVal = IntegerType::get(Context, NumBits);486return lltok::Type;487}488489// Otherwise, this was a letter sequence. See which keyword this is.490if (!KeywordEnd) KeywordEnd = CurPtr;491CurPtr = KeywordEnd;492--StartChar;493StringRef Keyword(StartChar, CurPtr - StartChar);494495#define KEYWORD(STR) \496do { \497if (Keyword == #STR) \498return lltok::kw_##STR; \499} while (false)500501KEYWORD(true); KEYWORD(false);502KEYWORD(declare); KEYWORD(define);503KEYWORD(global); KEYWORD(constant);504505KEYWORD(dso_local);506KEYWORD(dso_preemptable);507508KEYWORD(private);509KEYWORD(internal);510KEYWORD(available_externally);511KEYWORD(linkonce);512KEYWORD(linkonce_odr);513KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".514KEYWORD(weak_odr);515KEYWORD(appending);516KEYWORD(dllimport);517KEYWORD(dllexport);518KEYWORD(common);519KEYWORD(default);520KEYWORD(hidden);521KEYWORD(protected);522KEYWORD(unnamed_addr);523KEYWORD(local_unnamed_addr);524KEYWORD(externally_initialized);525KEYWORD(extern_weak);526KEYWORD(external);527KEYWORD(thread_local);528KEYWORD(localdynamic);529KEYWORD(initialexec);530KEYWORD(localexec);531KEYWORD(zeroinitializer);532KEYWORD(undef);533KEYWORD(null);534KEYWORD(none);535KEYWORD(poison);536KEYWORD(to);537KEYWORD(caller);538KEYWORD(within);539KEYWORD(from);540KEYWORD(tail);541KEYWORD(musttail);542KEYWORD(notail);543KEYWORD(target);544KEYWORD(triple);545KEYWORD(source_filename);546KEYWORD(unwind);547KEYWORD(datalayout);548KEYWORD(volatile);549KEYWORD(atomic);550KEYWORD(unordered);551KEYWORD(monotonic);552KEYWORD(acquire);553KEYWORD(release);554KEYWORD(acq_rel);555KEYWORD(seq_cst);556KEYWORD(syncscope);557558KEYWORD(nnan);559KEYWORD(ninf);560KEYWORD(nsz);561KEYWORD(arcp);562KEYWORD(contract);563KEYWORD(reassoc);564KEYWORD(afn);565KEYWORD(fast);566KEYWORD(nuw);567KEYWORD(nsw);568KEYWORD(nusw);569KEYWORD(exact);570KEYWORD(disjoint);571KEYWORD(inbounds);572KEYWORD(nneg);573KEYWORD(inrange);574KEYWORD(addrspace);575KEYWORD(section);576KEYWORD(partition);577KEYWORD(code_model);578KEYWORD(alias);579KEYWORD(ifunc);580KEYWORD(module);581KEYWORD(asm);582KEYWORD(sideeffect);583KEYWORD(inteldialect);584KEYWORD(gc);585KEYWORD(prefix);586KEYWORD(prologue);587588KEYWORD(no_sanitize_address);589KEYWORD(no_sanitize_hwaddress);590KEYWORD(sanitize_address_dyninit);591592KEYWORD(ccc);593KEYWORD(fastcc);594KEYWORD(coldcc);595KEYWORD(cfguard_checkcc);596KEYWORD(x86_stdcallcc);597KEYWORD(x86_fastcallcc);598KEYWORD(x86_thiscallcc);599KEYWORD(x86_vectorcallcc);600KEYWORD(arm_apcscc);601KEYWORD(arm_aapcscc);602KEYWORD(arm_aapcs_vfpcc);603KEYWORD(aarch64_vector_pcs);604KEYWORD(aarch64_sve_vector_pcs);605KEYWORD(aarch64_sme_preservemost_from_x0);606KEYWORD(aarch64_sme_preservemost_from_x1);607KEYWORD(aarch64_sme_preservemost_from_x2);608KEYWORD(msp430_intrcc);609KEYWORD(avr_intrcc);610KEYWORD(avr_signalcc);611KEYWORD(ptx_kernel);612KEYWORD(ptx_device);613KEYWORD(spir_kernel);614KEYWORD(spir_func);615KEYWORD(intel_ocl_bicc);616KEYWORD(x86_64_sysvcc);617KEYWORD(win64cc);618KEYWORD(x86_regcallcc);619KEYWORD(swiftcc);620KEYWORD(swifttailcc);621KEYWORD(anyregcc);622KEYWORD(preserve_mostcc);623KEYWORD(preserve_allcc);624KEYWORD(preserve_nonecc);625KEYWORD(ghccc);626KEYWORD(x86_intrcc);627KEYWORD(hhvmcc);628KEYWORD(hhvm_ccc);629KEYWORD(cxx_fast_tlscc);630KEYWORD(amdgpu_vs);631KEYWORD(amdgpu_ls);632KEYWORD(amdgpu_hs);633KEYWORD(amdgpu_es);634KEYWORD(amdgpu_gs);635KEYWORD(amdgpu_ps);636KEYWORD(amdgpu_cs);637KEYWORD(amdgpu_cs_chain);638KEYWORD(amdgpu_cs_chain_preserve);639KEYWORD(amdgpu_kernel);640KEYWORD(amdgpu_gfx);641KEYWORD(tailcc);642KEYWORD(m68k_rtdcc);643KEYWORD(graalcc);644KEYWORD(riscv_vector_cc);645646KEYWORD(cc);647KEYWORD(c);648649KEYWORD(attributes);650KEYWORD(sync);651KEYWORD(async);652653#define GET_ATTR_NAMES654#define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \655KEYWORD(DISPLAY_NAME);656#include "llvm/IR/Attributes.inc"657658KEYWORD(read);659KEYWORD(write);660KEYWORD(readwrite);661KEYWORD(argmem);662KEYWORD(inaccessiblemem);663KEYWORD(argmemonly);664KEYWORD(inaccessiblememonly);665KEYWORD(inaccessiblemem_or_argmemonly);666667// nofpclass attribute668KEYWORD(all);669KEYWORD(nan);670KEYWORD(snan);671KEYWORD(qnan);672KEYWORD(inf);673// ninf already a keyword674KEYWORD(pinf);675KEYWORD(norm);676KEYWORD(nnorm);677KEYWORD(pnorm);678// sub already a keyword679KEYWORD(nsub);680KEYWORD(psub);681KEYWORD(zero);682KEYWORD(nzero);683KEYWORD(pzero);684685KEYWORD(type);686KEYWORD(opaque);687688KEYWORD(comdat);689690// Comdat types691KEYWORD(any);692KEYWORD(exactmatch);693KEYWORD(largest);694KEYWORD(nodeduplicate);695KEYWORD(samesize);696697KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);698KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);699KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);700KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);701702KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);703KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin);704KEYWORD(uinc_wrap);705KEYWORD(udec_wrap);706707KEYWORD(splat);708KEYWORD(vscale);709KEYWORD(x);710KEYWORD(blockaddress);711KEYWORD(dso_local_equivalent);712KEYWORD(no_cfi);713KEYWORD(ptrauth);714715// Metadata types.716KEYWORD(distinct);717718// Use-list order directives.719KEYWORD(uselistorder);720KEYWORD(uselistorder_bb);721722KEYWORD(personality);723KEYWORD(cleanup);724KEYWORD(catch);725KEYWORD(filter);726727// Summary index keywords.728KEYWORD(path);729KEYWORD(hash);730KEYWORD(gv);731KEYWORD(guid);732KEYWORD(name);733KEYWORD(summaries);734KEYWORD(flags);735KEYWORD(blockcount);736KEYWORD(linkage);737KEYWORD(visibility);738KEYWORD(notEligibleToImport);739KEYWORD(live);740KEYWORD(dsoLocal);741KEYWORD(canAutoHide);742KEYWORD(importType);743KEYWORD(definition);744KEYWORD(declaration);745KEYWORD(function);746KEYWORD(insts);747KEYWORD(funcFlags);748KEYWORD(readNone);749KEYWORD(readOnly);750KEYWORD(noRecurse);751KEYWORD(returnDoesNotAlias);752KEYWORD(noInline);753KEYWORD(alwaysInline);754KEYWORD(noUnwind);755KEYWORD(mayThrow);756KEYWORD(hasUnknownCall);757KEYWORD(mustBeUnreachable);758KEYWORD(calls);759KEYWORD(callee);760KEYWORD(params);761KEYWORD(param);762KEYWORD(hotness);763KEYWORD(unknown);764KEYWORD(critical);765KEYWORD(relbf);766KEYWORD(variable);767KEYWORD(vTableFuncs);768KEYWORD(virtFunc);769KEYWORD(aliasee);770KEYWORD(refs);771KEYWORD(typeIdInfo);772KEYWORD(typeTests);773KEYWORD(typeTestAssumeVCalls);774KEYWORD(typeCheckedLoadVCalls);775KEYWORD(typeTestAssumeConstVCalls);776KEYWORD(typeCheckedLoadConstVCalls);777KEYWORD(vFuncId);778KEYWORD(offset);779KEYWORD(args);780KEYWORD(typeid);781KEYWORD(typeidCompatibleVTable);782KEYWORD(summary);783KEYWORD(typeTestRes);784KEYWORD(kind);785KEYWORD(unsat);786KEYWORD(byteArray);787KEYWORD(inline);788KEYWORD(single);789KEYWORD(allOnes);790KEYWORD(sizeM1BitWidth);791KEYWORD(alignLog2);792KEYWORD(sizeM1);793KEYWORD(bitMask);794KEYWORD(inlineBits);795KEYWORD(vcall_visibility);796KEYWORD(wpdResolutions);797KEYWORD(wpdRes);798KEYWORD(indir);799KEYWORD(singleImpl);800KEYWORD(branchFunnel);801KEYWORD(singleImplName);802KEYWORD(resByArg);803KEYWORD(byArg);804KEYWORD(uniformRetVal);805KEYWORD(uniqueRetVal);806KEYWORD(virtualConstProp);807KEYWORD(info);808KEYWORD(byte);809KEYWORD(bit);810KEYWORD(varFlags);811KEYWORD(callsites);812KEYWORD(clones);813KEYWORD(stackIds);814KEYWORD(allocs);815KEYWORD(versions);816KEYWORD(memProf);817KEYWORD(notcold);818819#undef KEYWORD820821// Keywords for types.822#define TYPEKEYWORD(STR, LLVMTY) \823do { \824if (Keyword == STR) { \825TyVal = LLVMTY; \826return lltok::Type; \827} \828} while (false)829830TYPEKEYWORD("void", Type::getVoidTy(Context));831TYPEKEYWORD("half", Type::getHalfTy(Context));832TYPEKEYWORD("bfloat", Type::getBFloatTy(Context));833TYPEKEYWORD("float", Type::getFloatTy(Context));834TYPEKEYWORD("double", Type::getDoubleTy(Context));835TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));836TYPEKEYWORD("fp128", Type::getFP128Ty(Context));837TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));838TYPEKEYWORD("label", Type::getLabelTy(Context));839TYPEKEYWORD("metadata", Type::getMetadataTy(Context));840TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));841TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context));842TYPEKEYWORD("token", Type::getTokenTy(Context));843TYPEKEYWORD("ptr", PointerType::getUnqual(Context));844845#undef TYPEKEYWORD846847// Keywords for instructions.848#define INSTKEYWORD(STR, Enum) \849do { \850if (Keyword == #STR) { \851UIntVal = Instruction::Enum; \852return lltok::kw_##STR; \853} \854} while (false)855856INSTKEYWORD(fneg, FNeg);857858INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);859INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);860INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);861INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);862INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);863INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);864INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);865INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);866867INSTKEYWORD(phi, PHI);868INSTKEYWORD(call, Call);869INSTKEYWORD(trunc, Trunc);870INSTKEYWORD(zext, ZExt);871INSTKEYWORD(sext, SExt);872INSTKEYWORD(fptrunc, FPTrunc);873INSTKEYWORD(fpext, FPExt);874INSTKEYWORD(uitofp, UIToFP);875INSTKEYWORD(sitofp, SIToFP);876INSTKEYWORD(fptoui, FPToUI);877INSTKEYWORD(fptosi, FPToSI);878INSTKEYWORD(inttoptr, IntToPtr);879INSTKEYWORD(ptrtoint, PtrToInt);880INSTKEYWORD(bitcast, BitCast);881INSTKEYWORD(addrspacecast, AddrSpaceCast);882INSTKEYWORD(select, Select);883INSTKEYWORD(va_arg, VAArg);884INSTKEYWORD(ret, Ret);885INSTKEYWORD(br, Br);886INSTKEYWORD(switch, Switch);887INSTKEYWORD(indirectbr, IndirectBr);888INSTKEYWORD(invoke, Invoke);889INSTKEYWORD(resume, Resume);890INSTKEYWORD(unreachable, Unreachable);891INSTKEYWORD(callbr, CallBr);892893INSTKEYWORD(alloca, Alloca);894INSTKEYWORD(load, Load);895INSTKEYWORD(store, Store);896INSTKEYWORD(cmpxchg, AtomicCmpXchg);897INSTKEYWORD(atomicrmw, AtomicRMW);898INSTKEYWORD(fence, Fence);899INSTKEYWORD(getelementptr, GetElementPtr);900901INSTKEYWORD(extractelement, ExtractElement);902INSTKEYWORD(insertelement, InsertElement);903INSTKEYWORD(shufflevector, ShuffleVector);904INSTKEYWORD(extractvalue, ExtractValue);905INSTKEYWORD(insertvalue, InsertValue);906INSTKEYWORD(landingpad, LandingPad);907INSTKEYWORD(cleanupret, CleanupRet);908INSTKEYWORD(catchret, CatchRet);909INSTKEYWORD(catchswitch, CatchSwitch);910INSTKEYWORD(catchpad, CatchPad);911INSTKEYWORD(cleanuppad, CleanupPad);912913INSTKEYWORD(freeze, Freeze);914915#undef INSTKEYWORD916917#define DWKEYWORD(TYPE, TOKEN) \918do { \919if (Keyword.starts_with("DW_" #TYPE "_")) { \920StrVal.assign(Keyword.begin(), Keyword.end()); \921return lltok::TOKEN; \922} \923} while (false)924925DWKEYWORD(TAG, DwarfTag);926DWKEYWORD(ATE, DwarfAttEncoding);927DWKEYWORD(VIRTUALITY, DwarfVirtuality);928DWKEYWORD(LANG, DwarfLang);929DWKEYWORD(CC, DwarfCC);930DWKEYWORD(OP, DwarfOp);931DWKEYWORD(MACINFO, DwarfMacinfo);932933#undef DWKEYWORD934935// Keywords for debug record types.936#define DBGRECORDTYPEKEYWORD(STR) \937do { \938if (Keyword == "dbg_" #STR) { \939StrVal = #STR; \940return lltok::DbgRecordType; \941} \942} while (false)943944DBGRECORDTYPEKEYWORD(value);945DBGRECORDTYPEKEYWORD(declare);946DBGRECORDTYPEKEYWORD(assign);947DBGRECORDTYPEKEYWORD(label);948#undef DBGRECORDTYPEKEYWORD949950if (Keyword.starts_with("DIFlag")) {951StrVal.assign(Keyword.begin(), Keyword.end());952return lltok::DIFlag;953}954955if (Keyword.starts_with("DISPFlag")) {956StrVal.assign(Keyword.begin(), Keyword.end());957return lltok::DISPFlag;958}959960if (Keyword.starts_with("CSK_")) {961StrVal.assign(Keyword.begin(), Keyword.end());962return lltok::ChecksumKind;963}964965if (Keyword == "NoDebug" || Keyword == "FullDebug" ||966Keyword == "LineTablesOnly" || Keyword == "DebugDirectivesOnly") {967StrVal.assign(Keyword.begin(), Keyword.end());968return lltok::EmissionKind;969}970971if (Keyword == "GNU" || Keyword == "Apple" || Keyword == "None" ||972Keyword == "Default") {973StrVal.assign(Keyword.begin(), Keyword.end());974return lltok::NameTableKind;975}976977// Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by978// the CFE to avoid forcing it to deal with 64-bit numbers.979if ((TokStart[0] == 'u' || TokStart[0] == 's') &&980TokStart[1] == '0' && TokStart[2] == 'x' &&981isxdigit(static_cast<unsigned char>(TokStart[3]))) {982int len = CurPtr-TokStart-3;983uint32_t bits = len * 4;984StringRef HexStr(TokStart + 3, len);985if (!all_of(HexStr, isxdigit)) {986// Bad token, return it as an error.987CurPtr = TokStart+3;988return lltok::Error;989}990APInt Tmp(bits, HexStr, 16);991uint32_t activeBits = Tmp.getActiveBits();992if (activeBits > 0 && activeBits < bits)993Tmp = Tmp.trunc(activeBits);994APSIntVal = APSInt(Tmp, TokStart[0] == 'u');995return lltok::APSInt;996}997998// If this is "cc1234", return this as just "cc".999if (TokStart[0] == 'c' && TokStart[1] == 'c') {1000CurPtr = TokStart+2;1001return lltok::kw_cc;1002}10031004// Finally, if this isn't known, return an error.1005CurPtr = TokStart+1;1006return lltok::Error;1007}10081009/// Lex all tokens that start with a 0x prefix, knowing they match and are not1010/// labels.1011/// HexFPConstant 0x[0-9A-Fa-f]+1012/// HexFP80Constant 0xK[0-9A-Fa-f]+1013/// HexFP128Constant 0xL[0-9A-Fa-f]+1014/// HexPPC128Constant 0xM[0-9A-Fa-f]+1015/// HexHalfConstant 0xH[0-9A-Fa-f]+1016/// HexBFloatConstant 0xR[0-9A-Fa-f]+1017lltok::Kind LLLexer::Lex0x() {1018CurPtr = TokStart + 2;10191020char Kind;1021if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' ||1022CurPtr[0] == 'R') {1023Kind = *CurPtr++;1024} else {1025Kind = 'J';1026}10271028if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {1029// Bad token, return it as an error.1030CurPtr = TokStart+1;1031return lltok::Error;1032}10331034while (isxdigit(static_cast<unsigned char>(CurPtr[0])))1035++CurPtr;10361037if (Kind == 'J') {1038// HexFPConstant - Floating point constant represented in IEEE format as a1039// hexadecimal number for when exponential notation is not precise enough.1040// Half, BFloat, Float, and double only.1041APFloatVal = APFloat(APFloat::IEEEdouble(),1042APInt(64, HexIntToVal(TokStart + 2, CurPtr)));1043return lltok::APFloat;1044}10451046uint64_t Pair[2];1047switch (Kind) {1048default: llvm_unreachable("Unknown kind!");1049case 'K':1050// F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)1051FP80HexToIntPair(TokStart+3, CurPtr, Pair);1052APFloatVal = APFloat(APFloat::x87DoubleExtended(), APInt(80, Pair));1053return lltok::APFloat;1054case 'L':1055// F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)1056HexToIntPair(TokStart+3, CurPtr, Pair);1057APFloatVal = APFloat(APFloat::IEEEquad(), APInt(128, Pair));1058return lltok::APFloat;1059case 'M':1060// PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)1061HexToIntPair(TokStart+3, CurPtr, Pair);1062APFloatVal = APFloat(APFloat::PPCDoubleDouble(), APInt(128, Pair));1063return lltok::APFloat;1064case 'H':1065APFloatVal = APFloat(APFloat::IEEEhalf(),1066APInt(16,HexIntToVal(TokStart+3, CurPtr)));1067return lltok::APFloat;1068case 'R':1069// Brain floating point1070APFloatVal = APFloat(APFloat::BFloat(),1071APInt(16, HexIntToVal(TokStart + 3, CurPtr)));1072return lltok::APFloat;1073}1074}10751076/// Lex tokens for a label or a numeric constant, possibly starting with -.1077/// Label [-a-zA-Z$._0-9]+:1078/// NInteger -[0-9]+1079/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?1080/// PInteger [0-9]+1081/// HexFPConstant 0x[0-9A-Fa-f]+1082/// HexFP80Constant 0xK[0-9A-Fa-f]+1083/// HexFP128Constant 0xL[0-9A-Fa-f]+1084/// HexPPC128Constant 0xM[0-9A-Fa-f]+1085lltok::Kind LLLexer::LexDigitOrNegative() {1086// If the letter after the negative is not a number, this is probably a label.1087if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&1088!isdigit(static_cast<unsigned char>(CurPtr[0]))) {1089// Okay, this is not a number after the -, it's probably a label.1090if (const char *End = isLabelTail(CurPtr)) {1091StrVal.assign(TokStart, End-1);1092CurPtr = End;1093return lltok::LabelStr;1094}10951096return lltok::Error;1097}10981099// At this point, it is either a label, int or fp constant.11001101// Skip digits, we have at least one.1102for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)1103/*empty*/;11041105// Check if this is a fully-numeric label:1106if (isdigit(TokStart[0]) && CurPtr[0] == ':') {1107uint64_t Val = atoull(TokStart, CurPtr);1108++CurPtr; // Skip the colon.1109if ((unsigned)Val != Val)1110Error("invalid value number (too large)!");1111UIntVal = unsigned(Val);1112return lltok::LabelID;1113}11141115// Check to see if this really is a string label, e.g. "-1:".1116if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {1117if (const char *End = isLabelTail(CurPtr)) {1118StrVal.assign(TokStart, End-1);1119CurPtr = End;1120return lltok::LabelStr;1121}1122}11231124// If the next character is a '.', then it is a fp value, otherwise its1125// integer.1126if (CurPtr[0] != '.') {1127if (TokStart[0] == '0' && TokStart[1] == 'x')1128return Lex0x();1129APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart));1130return lltok::APSInt;1131}11321133++CurPtr;11341135// Skip over [0-9]*([eE][-+]?[0-9]+)?1136while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;11371138if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {1139if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||1140((CurPtr[1] == '-' || CurPtr[1] == '+') &&1141isdigit(static_cast<unsigned char>(CurPtr[2])))) {1142CurPtr += 2;1143while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;1144}1145}11461147APFloatVal = APFloat(APFloat::IEEEdouble(),1148StringRef(TokStart, CurPtr - TokStart));1149return lltok::APFloat;1150}11511152/// Lex a floating point constant starting with +.1153/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?1154lltok::Kind LLLexer::LexPositive() {1155// If the letter after the negative is a number, this is probably not a1156// label.1157if (!isdigit(static_cast<unsigned char>(CurPtr[0])))1158return lltok::Error;11591160// Skip digits.1161for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)1162/*empty*/;11631164// At this point, we need a '.'.1165if (CurPtr[0] != '.') {1166CurPtr = TokStart+1;1167return lltok::Error;1168}11691170++CurPtr;11711172// Skip over [0-9]*([eE][-+]?[0-9]+)?1173while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;11741175if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {1176if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||1177((CurPtr[1] == '-' || CurPtr[1] == '+') &&1178isdigit(static_cast<unsigned char>(CurPtr[2])))) {1179CurPtr += 2;1180while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;1181}1182}11831184APFloatVal = APFloat(APFloat::IEEEdouble(),1185StringRef(TokStart, CurPtr - TokStart));1186return lltok::APFloat;1187}118811891190