Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Markup.cpp
35266 views
1
//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file defines the log symbolizer markup data model and parser.
11
///
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/DebugInfo/Symbolize/Markup.h"
15
16
#include "llvm/ADT/STLExtras.h"
17
#include "llvm/ADT/StringExtras.h"
18
19
namespace llvm {
20
namespace symbolize {
21
22
// Matches the following:
23
// "\033[0m"
24
// "\033[1m"
25
// "\033[30m" -- "\033[37m"
26
static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
27
28
MarkupParser::MarkupParser(StringSet<> MultilineTags)
29
: MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
30
31
static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
32
return Str.take_front(Pos - Str.begin());
33
}
34
static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
35
Str = Str.drop_front(Pos - Str.begin());
36
}
37
38
void MarkupParser::parseLine(StringRef Line) {
39
Buffer.clear();
40
NextIdx = 0;
41
FinishedMultiline.clear();
42
this->Line = Line;
43
}
44
45
std::optional<MarkupNode> MarkupParser::nextNode() {
46
// Pull something out of the buffer if possible.
47
if (!Buffer.empty()) {
48
if (NextIdx < Buffer.size())
49
return std::move(Buffer[NextIdx++]);
50
NextIdx = 0;
51
Buffer.clear();
52
}
53
54
// The buffer is empty, so parse the next bit of the line.
55
56
if (Line.empty())
57
return std::nullopt;
58
59
if (!InProgressMultiline.empty()) {
60
if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
61
llvm::append_range(InProgressMultiline, *MultilineEnd);
62
assert(FinishedMultiline.empty() &&
63
"At most one multi-line element can be finished at a time.");
64
FinishedMultiline.swap(InProgressMultiline);
65
// Parse the multi-line element as if it were contiguous.
66
advanceTo(Line, MultilineEnd->end());
67
return *parseElement(FinishedMultiline);
68
}
69
70
// The whole line is part of the multi-line element.
71
llvm::append_range(InProgressMultiline, Line);
72
Line = Line.drop_front(Line.size());
73
return std::nullopt;
74
}
75
76
// Find the first valid markup element, if any.
77
if (std::optional<MarkupNode> Element = parseElement(Line)) {
78
parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
79
Buffer.push_back(std::move(*Element));
80
advanceTo(Line, Element->Text.end());
81
return nextNode();
82
}
83
84
// Since there were no valid elements remaining, see if the line opens a
85
// multi-line element.
86
if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
87
// Emit any text before the element.
88
parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
89
90
// Begin recording the multi-line element.
91
llvm::append_range(InProgressMultiline, *MultilineBegin);
92
Line = Line.drop_front(Line.size());
93
return nextNode();
94
}
95
96
// The line doesn't contain any more markup elements, so emit it as text.
97
parseTextOutsideMarkup(Line);
98
Line = Line.drop_front(Line.size());
99
return nextNode();
100
}
101
102
void MarkupParser::flush() {
103
Buffer.clear();
104
NextIdx = 0;
105
Line = {};
106
if (InProgressMultiline.empty())
107
return;
108
FinishedMultiline.swap(InProgressMultiline);
109
parseTextOutsideMarkup(FinishedMultiline);
110
}
111
112
// Finds and returns the next valid markup element in the given line. Returns
113
// std::nullopt if the line contains no valid elements.
114
std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
115
while (true) {
116
// Find next element using begin and end markers.
117
size_t BeginPos = Line.find("{{{");
118
if (BeginPos == StringRef::npos)
119
return std::nullopt;
120
size_t EndPos = Line.find("}}}", BeginPos + 3);
121
if (EndPos == StringRef::npos)
122
return std::nullopt;
123
EndPos += 3;
124
MarkupNode Element;
125
Element.Text = Line.slice(BeginPos, EndPos);
126
Line = Line.substr(EndPos);
127
128
// Parse tag.
129
StringRef Content = Element.Text.drop_front(3).drop_back(3);
130
StringRef FieldsContent;
131
std::tie(Element.Tag, FieldsContent) = Content.split(':');
132
if (Element.Tag.empty())
133
continue;
134
135
// Parse fields.
136
if (!FieldsContent.empty())
137
FieldsContent.split(Element.Fields, ":");
138
else if (Content.back() == ':')
139
Element.Fields.push_back(FieldsContent);
140
141
return Element;
142
}
143
}
144
145
static MarkupNode textNode(StringRef Text) {
146
MarkupNode Node;
147
Node.Text = Text;
148
return Node;
149
}
150
151
// Parses a region of text known to be outside any markup elements. Such text
152
// may still contain SGR control codes, so the region is further subdivided into
153
// control codes and true text regions.
154
void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
155
if (Text.empty())
156
return;
157
SmallVector<StringRef> Matches;
158
while (SGRSyntax.match(Text, &Matches)) {
159
// Emit any text before the SGR element.
160
if (Matches.begin()->begin() != Text.begin())
161
Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
162
163
Buffer.push_back(textNode(*Matches.begin()));
164
advanceTo(Text, Matches.begin()->end());
165
}
166
if (!Text.empty())
167
Buffer.push_back(textNode(Text));
168
}
169
170
// Given that a line doesn't contain any valid markup, see if it ends with the
171
// start of a multi-line element. If so, returns the beginning.
172
std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
173
// A multi-line begin marker must be the last one on the line.
174
size_t BeginPos = Line.rfind("{{{");
175
if (BeginPos == StringRef::npos)
176
return std::nullopt;
177
size_t BeginTagPos = BeginPos + 3;
178
179
// If there are any end markers afterwards, the begin marker cannot belong to
180
// a multi-line element.
181
size_t EndPos = Line.find("}}}", BeginTagPos);
182
if (EndPos != StringRef::npos)
183
return std::nullopt;
184
185
// Check whether the tag is registered multi-line.
186
size_t EndTagPos = Line.find(':', BeginTagPos);
187
if (EndTagPos == StringRef::npos)
188
return std::nullopt;
189
StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
190
if (!MultilineTags.contains(Tag))
191
return std::nullopt;
192
return Line.substr(BeginPos);
193
}
194
195
// See if the line begins with the ending of an in-progress multi-line element.
196
// If so, return the ending.
197
std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
198
size_t EndPos = Line.find("}}}");
199
if (EndPos == StringRef::npos)
200
return std::nullopt;
201
return Line.take_front(EndPos + 3);
202
}
203
204
} // end namespace symbolize
205
} // end namespace llvm
206
207