Path: blob/main/contrib/llvm-project/lldb/source/Plugins/Language/ClangCommon/ClangHighlighter.cpp
39644 views
//===-- ClangHighlighter.cpp ----------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "ClangHighlighter.h"910#include "lldb/Host/FileSystem.h"11#include "lldb/Target/Language.h"12#include "lldb/Utility/AnsiTerminal.h"13#include "lldb/Utility/StreamString.h"1415#include "clang/Basic/FileManager.h"16#include "clang/Basic/SourceManager.h"17#include "clang/Lex/Lexer.h"18#include "llvm/ADT/StringSet.h"19#include "llvm/Support/MemoryBuffer.h"20#include <optional>2122using namespace lldb_private;2324bool ClangHighlighter::isKeyword(llvm::StringRef token) const {25return keywords.contains(token);26}2728ClangHighlighter::ClangHighlighter() {29#define KEYWORD(X, N) keywords.insert(#X);30#include "clang/Basic/TokenKinds.def"31}3233/// Determines which style should be applied to the given token.34/// \param highlighter35/// The current highlighter that should use the style.36/// \param token37/// The current token.38/// \param tok_str39/// The string in the source code the token represents.40/// \param options41/// The style we use for coloring the source code.42/// \param in_pp_directive43/// If we are currently in a preprocessor directive. NOTE: This is44/// passed by reference and will be updated if the current token starts45/// or ends a preprocessor directive.46/// \return47/// The ColorStyle that should be applied to the token.48static HighlightStyle::ColorStyle49determineClangStyle(const ClangHighlighter &highlighter,50const clang::Token &token, llvm::StringRef tok_str,51const HighlightStyle &options, bool &in_pp_directive) {52using namespace clang;5354if (token.is(tok::comment)) {55// If we were in a preprocessor directive before, we now left it.56in_pp_directive = false;57return options.comment;58} else if (in_pp_directive || token.getKind() == tok::hash) {59// Let's assume that the rest of the line is a PP directive.60in_pp_directive = true;61// Preprocessor directives are hard to match, so we have to hack this in.62return options.pp_directive;63} else if (tok::isStringLiteral(token.getKind()))64return options.string_literal;65else if (tok::isLiteral(token.getKind()))66return options.scalar_literal;67else if (highlighter.isKeyword(tok_str))68return options.keyword;69else70switch (token.getKind()) {71case tok::raw_identifier:72case tok::identifier:73return options.identifier;74case tok::l_brace:75case tok::r_brace:76return options.braces;77case tok::l_square:78case tok::r_square:79return options.square_brackets;80case tok::l_paren:81case tok::r_paren:82return options.parentheses;83case tok::comma:84return options.comma;85case tok::coloncolon:86case tok::colon:87return options.colon;8889case tok::amp:90case tok::ampamp:91case tok::ampequal:92case tok::star:93case tok::starequal:94case tok::plus:95case tok::plusplus:96case tok::plusequal:97case tok::minus:98case tok::arrow:99case tok::minusminus:100case tok::minusequal:101case tok::tilde:102case tok::exclaim:103case tok::exclaimequal:104case tok::slash:105case tok::slashequal:106case tok::percent:107case tok::percentequal:108case tok::less:109case tok::lessless:110case tok::lessequal:111case tok::lesslessequal:112case tok::spaceship:113case tok::greater:114case tok::greatergreater:115case tok::greaterequal:116case tok::greatergreaterequal:117case tok::caret:118case tok::caretequal:119case tok::pipe:120case tok::pipepipe:121case tok::pipeequal:122case tok::question:123case tok::equal:124case tok::equalequal:125return options.operators;126default:127break;128}129return HighlightStyle::ColorStyle();130}131132void ClangHighlighter::Highlight(const HighlightStyle &options,133llvm::StringRef line,134std::optional<size_t> cursor_pos,135llvm::StringRef previous_lines,136Stream &result) const {137using namespace clang;138139FileSystemOptions file_opts;140FileManager file_mgr(file_opts,141FileSystem::Instance().GetVirtualFileSystem());142143// The line might end in a backslash which would cause Clang to drop the144// backslash and the terminating new line. This makes sense when parsing C++,145// but when highlighting we care about preserving the backslash/newline. To146// not lose this information we remove the new line here so that Clang knows147// this is just a single line we are highlighting. We add back the newline148// after tokenizing.149llvm::StringRef line_ending = "";150// There are a few legal line endings Clang recognizes and we need to151// temporarily remove from the string.152if (line.consume_back("\r\n"))153line_ending = "\r\n";154else if (line.consume_back("\n"))155line_ending = "\n";156else if (line.consume_back("\r"))157line_ending = "\r";158159unsigned line_number = previous_lines.count('\n') + 1U;160161// Let's build the actual source code Clang needs and setup some utility162// objects.163std::string full_source = previous_lines.str() + line.str();164llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());165llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(166new DiagnosticOptions());167DiagnosticsEngine diags(diag_ids, diags_opts);168clang::SourceManager SM(diags, file_mgr);169auto buf = llvm::MemoryBuffer::getMemBuffer(full_source);170171FileID FID = SM.createFileID(buf->getMemBufferRef());172173// Let's just enable the latest ObjC and C++ which should get most tokens174// right.175LangOptions Opts;176Opts.ObjC = true;177// FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too178Opts.CPlusPlus17 = true;179Opts.LineComment = true;180181Lexer lex(FID, buf->getMemBufferRef(), SM, Opts);182// The lexer should keep whitespace around.183lex.SetKeepWhitespaceMode(true);184185// Keeps track if we have entered a PP directive.186bool in_pp_directive = false;187188// True once we actually lexed the user provided line.189bool found_user_line = false;190191// True if we already highlighted the token under the cursor, false otherwise.192bool highlighted_cursor = false;193Token token;194bool exit = false;195while (!exit) {196// Returns true if this is the last token we get from the lexer.197exit = lex.LexFromRawLexer(token);198199bool invalid = false;200unsigned current_line_number =201SM.getSpellingLineNumber(token.getLocation(), &invalid);202if (current_line_number != line_number)203continue;204found_user_line = true;205206// We don't need to print any tokens without a spelling line number.207if (invalid)208continue;209210// Same as above but with the column number.211invalid = false;212unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);213if (invalid)214continue;215// Column numbers start at 1, but indexes in our string start at 0.216--start;217218// Annotations don't have a length, so let's skip them.219if (token.isAnnotation())220continue;221222// Extract the token string from our source code.223llvm::StringRef tok_str = line.substr(start, token.getLength());224225// If the token is just an empty string, we can skip all the work below.226if (tok_str.empty())227continue;228229// If the cursor is inside this token, we have to apply the 'selected'230// highlight style before applying the actual token color.231llvm::StringRef to_print = tok_str;232StreamString storage;233auto end = start + token.getLength();234if (cursor_pos && end > *cursor_pos && !highlighted_cursor) {235highlighted_cursor = true;236options.selected.Apply(storage, tok_str);237to_print = storage.GetString();238}239240// See how we are supposed to highlight this token.241HighlightStyle::ColorStyle color =242determineClangStyle(*this, token, tok_str, options, in_pp_directive);243244color.Apply(result, to_print);245}246247// Add the line ending we trimmed before tokenizing.248result << line_ending;249250// If we went over the whole file but couldn't find our own file, then251// somehow our setup was wrong. When we're in release mode we just give the252// user the normal line and pretend we don't know how to highlight it. In253// debug mode we bail out with an assert as this should never happen.254if (!found_user_line) {255result << line;256assert(false && "We couldn't find the user line in the input file?");257}258}259260261