Path: blob/main/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h
35233 views
//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// This file contains FormatTokenLexer, which tokenizes a source file10/// into a token stream suitable for ClangFormat.11///12//===----------------------------------------------------------------------===//1314#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H15#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H1617#include "Encoding.h"18#include "FormatToken.h"19#include "llvm/ADT/MapVector.h"20#include "llvm/ADT/SmallPtrSet.h"21#include "llvm/ADT/StringSet.h"2223#include <stack>2425namespace clang {26namespace format {2728enum LexerState {29NORMAL,30TEMPLATE_STRING,31TOKEN_STASHED,32};3334class FormatTokenLexer {35public:36FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,37const FormatStyle &Style, encoding::Encoding Encoding,38llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,39IdentifierTable &IdentTable);4041ArrayRef<FormatToken *> lex();4243const AdditionalKeywords &getKeywords() { return Keywords; }4445private:46void tryMergePreviousTokens();4748bool tryMergeLessLess();49bool tryMergeGreaterGreater();50bool tryMergeNSStringLiteral();51bool tryMergeJSPrivateIdentifier();52bool tryMergeCSharpStringLiteral();53bool tryMergeCSharpKeywordVariables();54bool tryMergeNullishCoalescingEqual();55bool tryTransformCSharpForEach();56bool tryMergeForEach();57bool tryTransformTryUsageForC();5859// Merge the most recently lexed tokens into a single token if their kinds are60// correct.61bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);62// Merge without checking their kinds.63bool tryMergeTokens(size_t Count, TokenType NewType);64// Merge if their kinds match any one of Kinds.65bool tryMergeTokensAny(ArrayRef<ArrayRef<tok::TokenKind>> Kinds,66TokenType NewType);6768// Returns \c true if \p Tok can only be followed by an operand in JavaScript.69bool precedesOperand(FormatToken *Tok);7071bool canPrecedeRegexLiteral(FormatToken *Prev);7273// Tries to parse a JavaScript Regex literal starting at the current token,74// if that begins with a slash and is in a location where JavaScript allows75// regex literals. Changes the current token to a regex literal and updates76// its text if successful.77void tryParseJSRegexLiteral();7879// Handles JavaScript template strings.80//81// JavaScript template strings use backticks ('`') as delimiters, and allow82// embedding expressions nested in ${expr-here}. Template strings can be83// nested recursively, i.e. expressions can contain template strings in turn.84//85// The code below parses starting from a backtick, up to a closing backtick or86// an opening ${. It also maintains a stack of lexing contexts to handle87// nested template parts by balancing curly braces.88void handleTemplateStrings();8990void handleCSharpVerbatimAndInterpolatedStrings();9192// Handles TableGen multiline strings. It has the form [{ ... }].93void handleTableGenMultilineString();94// Handles TableGen numeric like identifiers.95// They have a forms of [0-9]*[_a-zA-Z]([_a-zA-Z0-9]*). But limited to the96// case it is not lexed as an integer.97void handleTableGenNumericLikeIdentifier();9899void tryParsePythonComment();100101bool tryMerge_TMacro();102103bool tryMergeConflictMarkers();104105void truncateToken(size_t NewLen);106107FormatToken *getStashedToken();108109FormatToken *getNextToken();110111FormatToken *FormatTok;112bool IsFirstToken;113std::stack<LexerState> StateStack;114unsigned Column;115unsigned TrailingWhitespace;116std::unique_ptr<Lexer> Lex;117LangOptions LangOpts;118const SourceManager &SourceMgr;119FileID ID;120const FormatStyle &Style;121IdentifierTable &IdentTable;122AdditionalKeywords Keywords;123encoding::Encoding Encoding;124llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;125// Index (in 'Tokens') of the last token that starts a new line.126unsigned FirstInLineIndex;127SmallVector<FormatToken *, 16> Tokens;128129llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;130131llvm::SmallPtrSet<IdentifierInfo *, 8> TypeNames;132133bool FormattingDisabled;134135llvm::Regex MacroBlockBeginRegex;136llvm::Regex MacroBlockEndRegex;137138// Targets that may appear inside a C# attribute.139static const llvm::StringSet<> CSharpAttributeTargets;140141/// Handle Verilog-specific tokens.142bool readRawTokenVerilogSpecific(Token &Tok);143144void readRawToken(FormatToken &Tok);145146void resetLexer(unsigned Offset);147};148149} // namespace format150} // namespace clang151152#endif153154155