Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
35266 views
1
//===-- HTMLLogger.cpp ----------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the HTML logger. Given a directory dir/, we write
10
// dir/0.html for the first analysis, etc.
11
// These files contain a visualization that allows inspecting the CFG and the
12
// state of the analysis at each point.
13
// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
14
// so each output file is self-contained.
15
//
16
// VIEWS
17
//
18
// The timeline and function view are always shown. These allow selecting basic
19
// blocks, statements within them, and processing iterations (BBs are visited
20
// multiple times when e.g. loops are involved).
21
// These are written directly into the HTML body.
22
//
23
// There are also listings of particular basic blocks, and dumps of the state
24
// at particular analysis points (i.e. BB2 iteration 3 statement 2).
25
// These are only shown when the relevant BB/analysis point is *selected*.
26
//
27
// DATA AND TEMPLATES
28
//
29
// The HTML proper is mostly static.
30
// The analysis data is in a JSON object HTMLLoggerData which is embedded as
31
// a <script> in the <head>.
32
// This gets rendered into DOM by a simple template processor which substitutes
33
// the data into <template> tags embedded in the HTML. (see inflate() in JS).
34
//
35
// SELECTION
36
//
37
// This is the only real interactive mechanism.
38
//
39
// At any given time, there are several named selections, e.g.:
40
// bb: B2 (basic block 0 is selected)
41
// elt: B2.4 (statement 4 is selected)
42
// iter: B2:1 (iteration 1 of the basic block is selected)
43
// hover: B3 (hovering over basic block 3)
44
//
45
// The selection is updated by mouse events: hover by moving the mouse and
46
// others by clicking. Elements that are click targets generally have attributes
47
// (id or data-foo) that define what they should select.
48
// See watchSelection() in JS for the exact logic.
49
//
50
// When the "bb" selection is set to "B2":
51
// - sections <section data-selection="bb"> get shown
52
// - templates under such sections get re-rendered
53
// - elements with class/id "B2" get class "bb-select"
54
//
55
//===----------------------------------------------------------------------===//
56
57
#include "clang/Analysis/FlowSensitive/AdornedCFG.h"
58
#include "clang/Analysis/FlowSensitive/DebugSupport.h"
59
#include "clang/Analysis/FlowSensitive/Logger.h"
60
#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
61
#include "clang/Analysis/FlowSensitive/Value.h"
62
#include "clang/Basic/SourceManager.h"
63
#include "clang/Lex/Lexer.h"
64
#include "llvm/ADT/DenseMap.h"
65
#include "llvm/ADT/ScopeExit.h"
66
#include "llvm/Support/Error.h"
67
#include "llvm/Support/FormatVariadic.h"
68
#include "llvm/Support/JSON.h"
69
#include "llvm/Support/Program.h"
70
#include "llvm/Support/ScopedPrinter.h"
71
#include "llvm/Support/raw_ostream.h"
72
// Defines assets: HTMLLogger_{html_js,css}
73
#include "HTMLLogger.inc"
74
75
namespace clang::dataflow {
76
namespace {
77
78
// Render a graphviz graph specification to SVG using the `dot` tool.
79
llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
80
81
using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
82
83
// Recursively dumps Values/StorageLocations as JSON
84
class ModelDumper {
85
public:
86
ModelDumper(llvm::json::OStream &JOS, const Environment &Env)
87
: JOS(JOS), Env(Env) {}
88
89
void dump(Value &V) {
90
JOS.attribute("value_id", llvm::to_string(&V));
91
if (!Visited.insert(&V).second)
92
return;
93
94
JOS.attribute("kind", debugString(V.getKind()));
95
96
switch (V.getKind()) {
97
case Value::Kind::Integer:
98
case Value::Kind::TopBool:
99
case Value::Kind::AtomicBool:
100
case Value::Kind::FormulaBool:
101
break;
102
case Value::Kind::Pointer:
103
JOS.attributeObject(
104
"pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); });
105
break;
106
}
107
108
for (const auto& Prop : V.properties())
109
JOS.attributeObject(("p:" + Prop.first()).str(),
110
[&] { dump(*Prop.second); });
111
112
// Running the SAT solver is expensive, but knowing which booleans are
113
// guaranteed true/false here is valuable and hard to determine by hand.
114
if (auto *B = llvm::dyn_cast<BoolValue>(&V)) {
115
JOS.attribute("formula", llvm::to_string(B->formula()));
116
JOS.attribute("truth", Env.proves(B->formula()) ? "true"
117
: Env.proves(Env.arena().makeNot(B->formula()))
118
? "false"
119
: "unknown");
120
}
121
}
122
void dump(const StorageLocation &L) {
123
JOS.attribute("location", llvm::to_string(&L));
124
if (!Visited.insert(&L).second)
125
return;
126
127
JOS.attribute("type", L.getType().getAsString());
128
if (!L.getType()->isRecordType())
129
if (auto *V = Env.getValue(L))
130
dump(*V);
131
132
if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) {
133
for (const auto &Child : RLoc->children())
134
JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] {
135
if (Child.second)
136
if (Value *Val = Env.getValue(*Child.second))
137
dump(*Val);
138
});
139
140
for (const auto &SyntheticField : RLoc->synthetic_fields())
141
JOS.attributeObject(("sf:" + SyntheticField.first()).str(),
142
[&] { dump(*SyntheticField.second); });
143
}
144
}
145
146
llvm::DenseSet<const void*> Visited;
147
llvm::json::OStream &JOS;
148
const Environment &Env;
149
};
150
151
class HTMLLogger : public Logger {
152
struct Iteration {
153
const CFGBlock *Block;
154
unsigned Iter;
155
bool PostVisit;
156
bool Converged;
157
};
158
159
StreamFactory Streams;
160
std::unique_ptr<llvm::raw_ostream> OS;
161
std::string JSON;
162
llvm::raw_string_ostream JStringStream{JSON};
163
llvm::json::OStream JOS{JStringStream, /*Indent=*/2};
164
165
const AdornedCFG *ACFG;
166
// Timeline of iterations of CFG block visitation.
167
std::vector<Iteration> Iters;
168
// Indexes in `Iters` of the iterations for each block.
169
llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters;
170
// For a given block ID, did the block converge (on the last iteration)?
171
llvm::BitVector BlockConverged;
172
// The messages logged in the current context but not yet written.
173
std::string ContextLogs;
174
// The number of elements we have visited within the current CFG block.
175
unsigned ElementIndex;
176
177
public:
178
explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
179
void beginAnalysis(const AdornedCFG &ACFG,
180
TypeErasedDataflowAnalysis &A) override {
181
OS = Streams();
182
this->ACFG = &ACFG;
183
*OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;
184
185
BlockConverged.resize(ACFG.getCFG().getNumBlockIDs());
186
187
const auto &D = ACFG.getDecl();
188
const auto &SM = A.getASTContext().getSourceManager();
189
*OS << "<title>";
190
if (const auto *ND = dyn_cast<NamedDecl>(&D))
191
*OS << ND->getNameAsString() << " at ";
192
*OS << SM.getFilename(D.getLocation()) << ":"
193
<< SM.getSpellingLineNumber(D.getLocation());
194
*OS << "</title>\n";
195
196
*OS << "<style>" << HTMLLogger_css << "</style>\n";
197
*OS << "<script>" << HTMLLogger_js << "</script>\n";
198
199
writeCode();
200
JOS.objectBegin();
201
JOS.attributeBegin("states");
202
JOS.objectBegin();
203
}
204
// Between beginAnalysis() and endAnalysis() we write all the states for
205
// particular analysis points into the `timeline` array.
206
void endAnalysis() override {
207
JOS.objectEnd();
208
JOS.attributeEnd();
209
210
JOS.attributeArray("timeline", [&] {
211
for (const auto &E : Iters) {
212
JOS.object([&] {
213
JOS.attribute("block", blockID(E.Block->getBlockID()));
214
JOS.attribute("iter", E.Iter);
215
JOS.attribute("post_visit", E.PostVisit);
216
JOS.attribute("converged", E.Converged);
217
});
218
}
219
});
220
JOS.attributeObject("cfg", [&] {
221
for (const auto &E : BlockIters)
222
writeBlock(*E.first, E.second);
223
});
224
225
JOS.objectEnd();
226
227
writeCFG();
228
229
*OS << "<script>var HTMLLoggerData = \n";
230
*OS << JSON;
231
*OS << ";\n</script>\n";
232
*OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;
233
}
234
235
void enterBlock(const CFGBlock &B, bool PostVisit) override {
236
llvm::SmallVector<size_t> &BIter = BlockIters[&B];
237
unsigned IterNum = BIter.size() + 1;
238
BIter.push_back(Iters.size());
239
Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false});
240
if (!PostVisit)
241
BlockConverged[B.getBlockID()] = false;
242
ElementIndex = 0;
243
}
244
void enterElement(const CFGElement &E) override {
245
++ElementIndex;
246
}
247
248
static std::string blockID(unsigned Block) {
249
return llvm::formatv("B{0}", Block);
250
}
251
static std::string eltID(unsigned Block, unsigned Element) {
252
return llvm::formatv("B{0}.{1}", Block, Element);
253
}
254
static std::string iterID(unsigned Block, unsigned Iter) {
255
return llvm::formatv("B{0}:{1}", Block, Iter);
256
}
257
static std::string elementIterID(unsigned Block, unsigned Iter,
258
unsigned Element) {
259
return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);
260
}
261
262
// Write the analysis state associated with a particular analysis point.
263
// FIXME: this dump is fairly opaque. We should show:
264
// - values associated with the current Stmt
265
// - values associated with its children
266
// - meaningful names for values
267
// - which boolean values are implied true/false by the flow condition
268
void recordState(TypeErasedDataflowAnalysisState &State) override {
269
unsigned Block = Iters.back().Block->getBlockID();
270
unsigned Iter = Iters.back().Iter;
271
bool PostVisit = Iters.back().PostVisit;
272
JOS.attributeObject(elementIterID(Block, Iter, ElementIndex), [&] {
273
JOS.attribute("block", blockID(Block));
274
JOS.attribute("iter", Iter);
275
JOS.attribute("post_visit", PostVisit);
276
JOS.attribute("element", ElementIndex);
277
278
// If this state immediately follows an Expr, show its built-in model.
279
if (ElementIndex > 0) {
280
auto S =
281
Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>();
282
if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) {
283
if (E->isPRValue()) {
284
if (!E->getType()->isRecordType())
285
if (auto *V = State.Env.getValue(*E))
286
JOS.attributeObject(
287
"value", [&] { ModelDumper(JOS, State.Env).dump(*V); });
288
} else {
289
if (auto *Loc = State.Env.getStorageLocation(*E))
290
JOS.attributeObject(
291
"value", [&] { ModelDumper(JOS, State.Env).dump(*Loc); });
292
}
293
}
294
}
295
if (!ContextLogs.empty()) {
296
JOS.attribute("logs", ContextLogs);
297
ContextLogs.clear();
298
}
299
{
300
std::string BuiltinLattice;
301
llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
302
State.Env.dump(BuiltinLatticeS);
303
JOS.attribute("builtinLattice", BuiltinLattice);
304
}
305
});
306
}
307
void blockConverged() override {
308
Iters.back().Converged = true;
309
BlockConverged[Iters.back().Block->getBlockID()] = true;
310
}
311
312
void logText(llvm::StringRef S) override {
313
ContextLogs.append(S.begin(), S.end());
314
ContextLogs.push_back('\n');
315
}
316
317
private:
318
// Write the CFG block details.
319
// Currently this is just the list of elements in execution order.
320
// FIXME: an AST dump would be a useful view, too.
321
void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) {
322
JOS.attributeObject(blockID(B.getBlockID()), [&] {
323
JOS.attributeArray("iters", [&] {
324
for (size_t IterIdx : ItersForB) {
325
const Iteration &Iter = Iters[IterIdx];
326
JOS.object([&] {
327
JOS.attribute("iter", Iter.Iter);
328
JOS.attribute("post_visit", Iter.PostVisit);
329
JOS.attribute("converged", Iter.Converged);
330
});
331
}
332
});
333
JOS.attributeArray("elements", [&] {
334
for (const auto &Elt : B.Elements) {
335
std::string Dump;
336
llvm::raw_string_ostream DumpS(Dump);
337
Elt.dumpToStream(DumpS);
338
JOS.value(Dump);
339
}
340
});
341
});
342
}
343
344
// Write the code of function being examined.
345
// We want to overlay the code with <span>s that mark which BB particular
346
// tokens are associated with, and even which BB element (so that clicking
347
// can select the right element).
348
void writeCode() {
349
const auto &AST = ACFG->getDecl().getASTContext();
350
bool Invalid = false;
351
352
// Extract the source code from the original file.
353
// Pretty-printing from the AST would probably be nicer (no macros or
354
// indentation to worry about), but we need the boundaries of particular
355
// AST nodes and the printer doesn't provide this.
356
auto Range = clang::Lexer::makeFileCharRange(
357
CharSourceRange::getTokenRange(ACFG->getDecl().getSourceRange()),
358
AST.getSourceManager(), AST.getLangOpts());
359
if (Range.isInvalid())
360
return;
361
llvm::StringRef Code = clang::Lexer::getSourceText(
362
Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);
363
if (Invalid)
364
return;
365
366
// TokenInfo stores the BB and set of elements that a token is part of.
367
struct TokenInfo {
368
enum : unsigned { Missing = static_cast<unsigned>(-1) };
369
370
// The basic block this is part of.
371
// This is the BB of the stmt with the smallest containing range.
372
unsigned BB = Missing;
373
unsigned BBPriority = 0;
374
// The most specific stmt this is part of (smallest range).
375
unsigned Elt = Missing;
376
unsigned EltPriority = 0;
377
// All stmts this is part of.
378
SmallVector<unsigned> Elts;
379
380
// Mark this token as being part of BB.Elt.
381
// RangeLen is the character length of the element's range, used to
382
// distinguish inner vs outer statements.
383
// For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
384
// However "a" has a smaller range, so is more specific. Clicking on the
385
// token "a" should select the stmt "a".
386
void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
387
// A worse BB (larger range) => ignore.
388
if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
389
return;
390
if (BB != this->BB) {
391
this->BB = BB;
392
Elts.clear();
393
BBPriority = RangeLen;
394
}
395
BBPriority = std::min(BBPriority, RangeLen);
396
Elts.push_back(Elt);
397
if (this->Elt == Missing || EltPriority > RangeLen)
398
this->Elt = Elt;
399
}
400
bool operator==(const TokenInfo &Other) const {
401
return std::tie(BB, Elt, Elts) ==
402
std::tie(Other.BB, Other.Elt, Other.Elts);
403
}
404
// Write the attributes for the <span> on this token.
405
void write(llvm::raw_ostream &OS) const {
406
OS << "class='c";
407
if (BB != Missing)
408
OS << " " << blockID(BB);
409
for (unsigned Elt : Elts)
410
OS << " " << eltID(BB, Elt);
411
OS << "'";
412
413
if (Elt != Missing)
414
OS << " data-elt='" << eltID(BB, Elt) << "'";
415
if (BB != Missing)
416
OS << " data-bb='" << blockID(BB) << "'";
417
}
418
};
419
420
// Construct one TokenInfo per character in a flat array.
421
// This is inefficient (chars in a token all have the same info) but simple.
422
std::vector<TokenInfo> State(Code.size());
423
for (const auto *Block : ACFG->getCFG()) {
424
unsigned EltIndex = 0;
425
for (const auto& Elt : *Block) {
426
++EltIndex;
427
if (const auto S = Elt.getAs<CFGStmt>()) {
428
auto EltRange = clang::Lexer::makeFileCharRange(
429
CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
430
AST.getSourceManager(), AST.getLangOpts());
431
if (EltRange.isInvalid())
432
continue;
433
if (EltRange.getBegin() < Range.getBegin() ||
434
EltRange.getEnd() >= Range.getEnd() ||
435
EltRange.getEnd() < Range.getBegin() ||
436
EltRange.getEnd() >= Range.getEnd())
437
continue;
438
unsigned Off = EltRange.getBegin().getRawEncoding() -
439
Range.getBegin().getRawEncoding();
440
unsigned Len = EltRange.getEnd().getRawEncoding() -
441
EltRange.getBegin().getRawEncoding();
442
for (unsigned I = 0; I < Len; ++I)
443
State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
444
}
445
}
446
}
447
448
// Finally, write the code with the correct <span>s.
449
unsigned Line =
450
AST.getSourceManager().getSpellingLineNumber(Range.getBegin());
451
*OS << "<template data-copy='code'>\n";
452
*OS << "<code class='filename'>";
453
llvm::printHTMLEscaped(
454
llvm::sys::path::filename(
455
AST.getSourceManager().getFilename(Range.getBegin())),
456
*OS);
457
*OS << "</code>";
458
*OS << "<code class='line' data-line='" << Line++ << "'>";
459
for (unsigned I = 0; I < Code.size(); ++I) {
460
// Don't actually write a <span> around each character, only break spans
461
// when the TokenInfo changes.
462
bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
463
bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
464
if (NeedOpen) {
465
*OS << "<span ";
466
State[I].write(*OS);
467
*OS << ">";
468
}
469
if (Code[I] == '\n')
470
*OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";
471
else
472
llvm::printHTMLEscaped(Code.substr(I, 1), *OS);
473
if (NeedClose) *OS << "</span>";
474
}
475
*OS << "</code>\n";
476
*OS << "</template>";
477
}
478
479
// Write the CFG diagram, a graph of basic blocks.
480
// Laying out graphs is hard, so we construct a graphviz description and shell
481
// out to `dot` to turn it into an SVG.
482
void writeCFG() {
483
*OS << "<template data-copy='cfg'>\n";
484
if (auto SVG = renderSVG(buildCFGDot(ACFG->getCFG())))
485
*OS << *SVG;
486
else
487
*OS << "Can't draw CFG: " << toString(SVG.takeError());
488
*OS << "</template>\n";
489
}
490
491
// Produce a graphviz description of a CFG.
492
std::string buildCFGDot(const clang::CFG &CFG) {
493
std::string Graph;
494
llvm::raw_string_ostream GraphS(Graph);
495
// Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
496
GraphS << R"(digraph {
497
tooltip=" "
498
node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
499
edge[tooltip = " "]
500
)";
501
for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) {
502
std::string Name = blockID(I);
503
// Rightwards arrow, vertical line
504
const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c";
505
if (BlockConverged[I])
506
Name += ConvergenceMarker;
507
GraphS << " " << blockID(I) << " [id=" << blockID(I) << " label=\""
508
<< Name << "\"]\n";
509
}
510
for (const auto *Block : CFG) {
511
for (const auto &Succ : Block->succs()) {
512
if (Succ.getReachableBlock())
513
GraphS << " " << blockID(Block->getBlockID()) << " -> "
514
<< blockID(Succ.getReachableBlock()->getBlockID()) << "\n";
515
}
516
}
517
GraphS << "}\n";
518
return Graph;
519
}
520
};
521
522
// Nothing interesting here, just subprocess/temp-file plumbing.
523
llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
524
std::string DotPath;
525
if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT"))
526
DotPath = FromEnv;
527
else {
528
auto FromPath = llvm::sys::findProgramByName("dot");
529
if (!FromPath)
530
return llvm::createStringError(FromPath.getError(),
531
"'dot' not found on PATH");
532
DotPath = FromPath.get();
533
}
534
535
// Create input and output files for `dot` subprocess.
536
// (We create the output file as empty, to reserve the temp filename).
537
llvm::SmallString<256> Input, Output;
538
int InputFD;
539
if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
540
Input))
541
return llvm::createStringError(EC, "failed to create `dot` temp input");
542
llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
543
auto DeleteInput =
544
llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });
545
if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
546
return llvm::createStringError(EC, "failed to create `dot` temp output");
547
auto DeleteOutput =
548
llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });
549
550
std::vector<std::optional<llvm::StringRef>> Redirects = {
551
Input, Output,
552
/*stderr=*/std::nullopt};
553
std::string ErrMsg;
554
int Code = llvm::sys::ExecuteAndWait(
555
DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,
556
/*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
557
if (!ErrMsg.empty())
558
return llvm::createStringError(llvm::inconvertibleErrorCode(),
559
"'dot' failed: " + ErrMsg);
560
if (Code != 0)
561
return llvm::createStringError(llvm::inconvertibleErrorCode(),
562
"'dot' failed (" + llvm::Twine(Code) + ")");
563
564
auto Buf = llvm::MemoryBuffer::getFile(Output);
565
if (!Buf)
566
return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
567
568
// Output has <?xml> prefix we don't want. Skip to <svg> tag.
569
llvm::StringRef Result = Buf.get()->getBuffer();
570
auto Pos = Result.find("<svg");
571
if (Pos == llvm::StringRef::npos)
572
return llvm::createStringError(llvm::inconvertibleErrorCode(),
573
"Can't find <svg> tag in `dot` output");
574
return Result.substr(Pos).str();
575
}
576
577
} // namespace
578
579
std::unique_ptr<Logger>
580
Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
581
return std::make_unique<HTMLLogger>(std::move(Streams));
582
}
583
584
} // namespace clang::dataflow
585
586