Path: blob/main/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Markup.cpp
35266 views
//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// This file defines the log symbolizer markup data model and parser.10///11//===----------------------------------------------------------------------===//1213#include "llvm/DebugInfo/Symbolize/Markup.h"1415#include "llvm/ADT/STLExtras.h"16#include "llvm/ADT/StringExtras.h"1718namespace llvm {19namespace symbolize {2021// Matches the following:22// "\033[0m"23// "\033[1m"24// "\033[30m" -- "\033[37m"25static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";2627MarkupParser::MarkupParser(StringSet<> MultilineTags)28: MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}2930static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {31return Str.take_front(Pos - Str.begin());32}33static void advanceTo(StringRef &Str, StringRef::iterator Pos) {34Str = Str.drop_front(Pos - Str.begin());35}3637void MarkupParser::parseLine(StringRef Line) {38Buffer.clear();39NextIdx = 0;40FinishedMultiline.clear();41this->Line = Line;42}4344std::optional<MarkupNode> MarkupParser::nextNode() {45// Pull something out of the buffer if possible.46if (!Buffer.empty()) {47if (NextIdx < Buffer.size())48return std::move(Buffer[NextIdx++]);49NextIdx = 0;50Buffer.clear();51}5253// The buffer is empty, so parse the next bit of the line.5455if (Line.empty())56return std::nullopt;5758if (!InProgressMultiline.empty()) {59if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {60llvm::append_range(InProgressMultiline, *MultilineEnd);61assert(FinishedMultiline.empty() &&62"At most one multi-line element can be finished at a time.");63FinishedMultiline.swap(InProgressMultiline);64// Parse the multi-line element as if it were contiguous.65advanceTo(Line, MultilineEnd->end());66return *parseElement(FinishedMultiline);67}6869// The whole line is part of the multi-line element.70llvm::append_range(InProgressMultiline, Line);71Line = Line.drop_front(Line.size());72return std::nullopt;73}7475// Find the first valid markup element, if any.76if (std::optional<MarkupNode> Element = parseElement(Line)) {77parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));78Buffer.push_back(std::move(*Element));79advanceTo(Line, Element->Text.end());80return nextNode();81}8283// Since there were no valid elements remaining, see if the line opens a84// multi-line element.85if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {86// Emit any text before the element.87parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));8889// Begin recording the multi-line element.90llvm::append_range(InProgressMultiline, *MultilineBegin);91Line = Line.drop_front(Line.size());92return nextNode();93}9495// The line doesn't contain any more markup elements, so emit it as text.96parseTextOutsideMarkup(Line);97Line = Line.drop_front(Line.size());98return nextNode();99}100101void MarkupParser::flush() {102Buffer.clear();103NextIdx = 0;104Line = {};105if (InProgressMultiline.empty())106return;107FinishedMultiline.swap(InProgressMultiline);108parseTextOutsideMarkup(FinishedMultiline);109}110111// Finds and returns the next valid markup element in the given line. Returns112// std::nullopt if the line contains no valid elements.113std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {114while (true) {115// Find next element using begin and end markers.116size_t BeginPos = Line.find("{{{");117if (BeginPos == StringRef::npos)118return std::nullopt;119size_t EndPos = Line.find("}}}", BeginPos + 3);120if (EndPos == StringRef::npos)121return std::nullopt;122EndPos += 3;123MarkupNode Element;124Element.Text = Line.slice(BeginPos, EndPos);125Line = Line.substr(EndPos);126127// Parse tag.128StringRef Content = Element.Text.drop_front(3).drop_back(3);129StringRef FieldsContent;130std::tie(Element.Tag, FieldsContent) = Content.split(':');131if (Element.Tag.empty())132continue;133134// Parse fields.135if (!FieldsContent.empty())136FieldsContent.split(Element.Fields, ":");137else if (Content.back() == ':')138Element.Fields.push_back(FieldsContent);139140return Element;141}142}143144static MarkupNode textNode(StringRef Text) {145MarkupNode Node;146Node.Text = Text;147return Node;148}149150// Parses a region of text known to be outside any markup elements. Such text151// may still contain SGR control codes, so the region is further subdivided into152// control codes and true text regions.153void MarkupParser::parseTextOutsideMarkup(StringRef Text) {154if (Text.empty())155return;156SmallVector<StringRef> Matches;157while (SGRSyntax.match(Text, &Matches)) {158// Emit any text before the SGR element.159if (Matches.begin()->begin() != Text.begin())160Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));161162Buffer.push_back(textNode(*Matches.begin()));163advanceTo(Text, Matches.begin()->end());164}165if (!Text.empty())166Buffer.push_back(textNode(Text));167}168169// Given that a line doesn't contain any valid markup, see if it ends with the170// start of a multi-line element. If so, returns the beginning.171std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {172// A multi-line begin marker must be the last one on the line.173size_t BeginPos = Line.rfind("{{{");174if (BeginPos == StringRef::npos)175return std::nullopt;176size_t BeginTagPos = BeginPos + 3;177178// If there are any end markers afterwards, the begin marker cannot belong to179// a multi-line element.180size_t EndPos = Line.find("}}}", BeginTagPos);181if (EndPos != StringRef::npos)182return std::nullopt;183184// Check whether the tag is registered multi-line.185size_t EndTagPos = Line.find(':', BeginTagPos);186if (EndTagPos == StringRef::npos)187return std::nullopt;188StringRef Tag = Line.slice(BeginTagPos, EndTagPos);189if (!MultilineTags.contains(Tag))190return std::nullopt;191return Line.substr(BeginPos);192}193194// See if the line begins with the ending of an in-progress multi-line element.195// If so, return the ending.196std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {197size_t EndPos = Line.find("}}}");198if (EndPos == StringRef::npos)199return std::nullopt;200return Line.take_front(EndPos + 3);201}202203} // end namespace symbolize204} // end namespace llvm205206207