Path: blob/main/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
35266 views
//===-- HTMLLogger.cpp ----------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the HTML logger. Given a directory dir/, we write9// dir/0.html for the first analysis, etc.10// These files contain a visualization that allows inspecting the CFG and the11// state of the analysis at each point.12// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded13// so each output file is self-contained.14//15// VIEWS16//17// The timeline and function view are always shown. These allow selecting basic18// blocks, statements within them, and processing iterations (BBs are visited19// multiple times when e.g. loops are involved).20// These are written directly into the HTML body.21//22// There are also listings of particular basic blocks, and dumps of the state23// at particular analysis points (i.e. BB2 iteration 3 statement 2).24// These are only shown when the relevant BB/analysis point is *selected*.25//26// DATA AND TEMPLATES27//28// The HTML proper is mostly static.29// The analysis data is in a JSON object HTMLLoggerData which is embedded as30// a <script> in the <head>.31// This gets rendered into DOM by a simple template processor which substitutes32// the data into <template> tags embedded in the HTML. (see inflate() in JS).33//34// SELECTION35//36// This is the only real interactive mechanism.37//38// At any given time, there are several named selections, e.g.:39// bb: B2 (basic block 0 is selected)40// elt: B2.4 (statement 4 is selected)41// iter: B2:1 (iteration 1 of the basic block is selected)42// hover: B3 (hovering over basic block 3)43//44// The selection is updated by mouse events: hover by moving the mouse and45// others by clicking. Elements that are click targets generally have attributes46// (id or data-foo) that define what they should select.47// See watchSelection() in JS for the exact logic.48//49// When the "bb" selection is set to "B2":50// - sections <section data-selection="bb"> get shown51// - templates under such sections get re-rendered52// - elements with class/id "B2" get class "bb-select"53//54//===----------------------------------------------------------------------===//5556#include "clang/Analysis/FlowSensitive/AdornedCFG.h"57#include "clang/Analysis/FlowSensitive/DebugSupport.h"58#include "clang/Analysis/FlowSensitive/Logger.h"59#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"60#include "clang/Analysis/FlowSensitive/Value.h"61#include "clang/Basic/SourceManager.h"62#include "clang/Lex/Lexer.h"63#include "llvm/ADT/DenseMap.h"64#include "llvm/ADT/ScopeExit.h"65#include "llvm/Support/Error.h"66#include "llvm/Support/FormatVariadic.h"67#include "llvm/Support/JSON.h"68#include "llvm/Support/Program.h"69#include "llvm/Support/ScopedPrinter.h"70#include "llvm/Support/raw_ostream.h"71// Defines assets: HTMLLogger_{html_js,css}72#include "HTMLLogger.inc"7374namespace clang::dataflow {75namespace {7677// Render a graphviz graph specification to SVG using the `dot` tool.78llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);7980using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;8182// Recursively dumps Values/StorageLocations as JSON83class ModelDumper {84public:85ModelDumper(llvm::json::OStream &JOS, const Environment &Env)86: JOS(JOS), Env(Env) {}8788void dump(Value &V) {89JOS.attribute("value_id", llvm::to_string(&V));90if (!Visited.insert(&V).second)91return;9293JOS.attribute("kind", debugString(V.getKind()));9495switch (V.getKind()) {96case Value::Kind::Integer:97case Value::Kind::TopBool:98case Value::Kind::AtomicBool:99case Value::Kind::FormulaBool:100break;101case Value::Kind::Pointer:102JOS.attributeObject(103"pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); });104break;105}106107for (const auto& Prop : V.properties())108JOS.attributeObject(("p:" + Prop.first()).str(),109[&] { dump(*Prop.second); });110111// Running the SAT solver is expensive, but knowing which booleans are112// guaranteed true/false here is valuable and hard to determine by hand.113if (auto *B = llvm::dyn_cast<BoolValue>(&V)) {114JOS.attribute("formula", llvm::to_string(B->formula()));115JOS.attribute("truth", Env.proves(B->formula()) ? "true"116: Env.proves(Env.arena().makeNot(B->formula()))117? "false"118: "unknown");119}120}121void dump(const StorageLocation &L) {122JOS.attribute("location", llvm::to_string(&L));123if (!Visited.insert(&L).second)124return;125126JOS.attribute("type", L.getType().getAsString());127if (!L.getType()->isRecordType())128if (auto *V = Env.getValue(L))129dump(*V);130131if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) {132for (const auto &Child : RLoc->children())133JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] {134if (Child.second)135if (Value *Val = Env.getValue(*Child.second))136dump(*Val);137});138139for (const auto &SyntheticField : RLoc->synthetic_fields())140JOS.attributeObject(("sf:" + SyntheticField.first()).str(),141[&] { dump(*SyntheticField.second); });142}143}144145llvm::DenseSet<const void*> Visited;146llvm::json::OStream &JOS;147const Environment &Env;148};149150class HTMLLogger : public Logger {151struct Iteration {152const CFGBlock *Block;153unsigned Iter;154bool PostVisit;155bool Converged;156};157158StreamFactory Streams;159std::unique_ptr<llvm::raw_ostream> OS;160std::string JSON;161llvm::raw_string_ostream JStringStream{JSON};162llvm::json::OStream JOS{JStringStream, /*Indent=*/2};163164const AdornedCFG *ACFG;165// Timeline of iterations of CFG block visitation.166std::vector<Iteration> Iters;167// Indexes in `Iters` of the iterations for each block.168llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters;169// For a given block ID, did the block converge (on the last iteration)?170llvm::BitVector BlockConverged;171// The messages logged in the current context but not yet written.172std::string ContextLogs;173// The number of elements we have visited within the current CFG block.174unsigned ElementIndex;175176public:177explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}178void beginAnalysis(const AdornedCFG &ACFG,179TypeErasedDataflowAnalysis &A) override {180OS = Streams();181this->ACFG = &ACFG;182*OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;183184BlockConverged.resize(ACFG.getCFG().getNumBlockIDs());185186const auto &D = ACFG.getDecl();187const auto &SM = A.getASTContext().getSourceManager();188*OS << "<title>";189if (const auto *ND = dyn_cast<NamedDecl>(&D))190*OS << ND->getNameAsString() << " at ";191*OS << SM.getFilename(D.getLocation()) << ":"192<< SM.getSpellingLineNumber(D.getLocation());193*OS << "</title>\n";194195*OS << "<style>" << HTMLLogger_css << "</style>\n";196*OS << "<script>" << HTMLLogger_js << "</script>\n";197198writeCode();199JOS.objectBegin();200JOS.attributeBegin("states");201JOS.objectBegin();202}203// Between beginAnalysis() and endAnalysis() we write all the states for204// particular analysis points into the `timeline` array.205void endAnalysis() override {206JOS.objectEnd();207JOS.attributeEnd();208209JOS.attributeArray("timeline", [&] {210for (const auto &E : Iters) {211JOS.object([&] {212JOS.attribute("block", blockID(E.Block->getBlockID()));213JOS.attribute("iter", E.Iter);214JOS.attribute("post_visit", E.PostVisit);215JOS.attribute("converged", E.Converged);216});217}218});219JOS.attributeObject("cfg", [&] {220for (const auto &E : BlockIters)221writeBlock(*E.first, E.second);222});223224JOS.objectEnd();225226writeCFG();227228*OS << "<script>var HTMLLoggerData = \n";229*OS << JSON;230*OS << ";\n</script>\n";231*OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;232}233234void enterBlock(const CFGBlock &B, bool PostVisit) override {235llvm::SmallVector<size_t> &BIter = BlockIters[&B];236unsigned IterNum = BIter.size() + 1;237BIter.push_back(Iters.size());238Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false});239if (!PostVisit)240BlockConverged[B.getBlockID()] = false;241ElementIndex = 0;242}243void enterElement(const CFGElement &E) override {244++ElementIndex;245}246247static std::string blockID(unsigned Block) {248return llvm::formatv("B{0}", Block);249}250static std::string eltID(unsigned Block, unsigned Element) {251return llvm::formatv("B{0}.{1}", Block, Element);252}253static std::string iterID(unsigned Block, unsigned Iter) {254return llvm::formatv("B{0}:{1}", Block, Iter);255}256static std::string elementIterID(unsigned Block, unsigned Iter,257unsigned Element) {258return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);259}260261// Write the analysis state associated with a particular analysis point.262// FIXME: this dump is fairly opaque. We should show:263// - values associated with the current Stmt264// - values associated with its children265// - meaningful names for values266// - which boolean values are implied true/false by the flow condition267void recordState(TypeErasedDataflowAnalysisState &State) override {268unsigned Block = Iters.back().Block->getBlockID();269unsigned Iter = Iters.back().Iter;270bool PostVisit = Iters.back().PostVisit;271JOS.attributeObject(elementIterID(Block, Iter, ElementIndex), [&] {272JOS.attribute("block", blockID(Block));273JOS.attribute("iter", Iter);274JOS.attribute("post_visit", PostVisit);275JOS.attribute("element", ElementIndex);276277// If this state immediately follows an Expr, show its built-in model.278if (ElementIndex > 0) {279auto S =280Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>();281if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) {282if (E->isPRValue()) {283if (!E->getType()->isRecordType())284if (auto *V = State.Env.getValue(*E))285JOS.attributeObject(286"value", [&] { ModelDumper(JOS, State.Env).dump(*V); });287} else {288if (auto *Loc = State.Env.getStorageLocation(*E))289JOS.attributeObject(290"value", [&] { ModelDumper(JOS, State.Env).dump(*Loc); });291}292}293}294if (!ContextLogs.empty()) {295JOS.attribute("logs", ContextLogs);296ContextLogs.clear();297}298{299std::string BuiltinLattice;300llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);301State.Env.dump(BuiltinLatticeS);302JOS.attribute("builtinLattice", BuiltinLattice);303}304});305}306void blockConverged() override {307Iters.back().Converged = true;308BlockConverged[Iters.back().Block->getBlockID()] = true;309}310311void logText(llvm::StringRef S) override {312ContextLogs.append(S.begin(), S.end());313ContextLogs.push_back('\n');314}315316private:317// Write the CFG block details.318// Currently this is just the list of elements in execution order.319// FIXME: an AST dump would be a useful view, too.320void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) {321JOS.attributeObject(blockID(B.getBlockID()), [&] {322JOS.attributeArray("iters", [&] {323for (size_t IterIdx : ItersForB) {324const Iteration &Iter = Iters[IterIdx];325JOS.object([&] {326JOS.attribute("iter", Iter.Iter);327JOS.attribute("post_visit", Iter.PostVisit);328JOS.attribute("converged", Iter.Converged);329});330}331});332JOS.attributeArray("elements", [&] {333for (const auto &Elt : B.Elements) {334std::string Dump;335llvm::raw_string_ostream DumpS(Dump);336Elt.dumpToStream(DumpS);337JOS.value(Dump);338}339});340});341}342343// Write the code of function being examined.344// We want to overlay the code with <span>s that mark which BB particular345// tokens are associated with, and even which BB element (so that clicking346// can select the right element).347void writeCode() {348const auto &AST = ACFG->getDecl().getASTContext();349bool Invalid = false;350351// Extract the source code from the original file.352// Pretty-printing from the AST would probably be nicer (no macros or353// indentation to worry about), but we need the boundaries of particular354// AST nodes and the printer doesn't provide this.355auto Range = clang::Lexer::makeFileCharRange(356CharSourceRange::getTokenRange(ACFG->getDecl().getSourceRange()),357AST.getSourceManager(), AST.getLangOpts());358if (Range.isInvalid())359return;360llvm::StringRef Code = clang::Lexer::getSourceText(361Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);362if (Invalid)363return;364365// TokenInfo stores the BB and set of elements that a token is part of.366struct TokenInfo {367enum : unsigned { Missing = static_cast<unsigned>(-1) };368369// The basic block this is part of.370// This is the BB of the stmt with the smallest containing range.371unsigned BB = Missing;372unsigned BBPriority = 0;373// The most specific stmt this is part of (smallest range).374unsigned Elt = Missing;375unsigned EltPriority = 0;376// All stmts this is part of.377SmallVector<unsigned> Elts;378379// Mark this token as being part of BB.Elt.380// RangeLen is the character length of the element's range, used to381// distinguish inner vs outer statements.382// For example in `a==0`, token "a" is part of the stmts "a" and "a==0".383// However "a" has a smaller range, so is more specific. Clicking on the384// token "a" should select the stmt "a".385void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {386// A worse BB (larger range) => ignore.387if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)388return;389if (BB != this->BB) {390this->BB = BB;391Elts.clear();392BBPriority = RangeLen;393}394BBPriority = std::min(BBPriority, RangeLen);395Elts.push_back(Elt);396if (this->Elt == Missing || EltPriority > RangeLen)397this->Elt = Elt;398}399bool operator==(const TokenInfo &Other) const {400return std::tie(BB, Elt, Elts) ==401std::tie(Other.BB, Other.Elt, Other.Elts);402}403// Write the attributes for the <span> on this token.404void write(llvm::raw_ostream &OS) const {405OS << "class='c";406if (BB != Missing)407OS << " " << blockID(BB);408for (unsigned Elt : Elts)409OS << " " << eltID(BB, Elt);410OS << "'";411412if (Elt != Missing)413OS << " data-elt='" << eltID(BB, Elt) << "'";414if (BB != Missing)415OS << " data-bb='" << blockID(BB) << "'";416}417};418419// Construct one TokenInfo per character in a flat array.420// This is inefficient (chars in a token all have the same info) but simple.421std::vector<TokenInfo> State(Code.size());422for (const auto *Block : ACFG->getCFG()) {423unsigned EltIndex = 0;424for (const auto& Elt : *Block) {425++EltIndex;426if (const auto S = Elt.getAs<CFGStmt>()) {427auto EltRange = clang::Lexer::makeFileCharRange(428CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),429AST.getSourceManager(), AST.getLangOpts());430if (EltRange.isInvalid())431continue;432if (EltRange.getBegin() < Range.getBegin() ||433EltRange.getEnd() >= Range.getEnd() ||434EltRange.getEnd() < Range.getBegin() ||435EltRange.getEnd() >= Range.getEnd())436continue;437unsigned Off = EltRange.getBegin().getRawEncoding() -438Range.getBegin().getRawEncoding();439unsigned Len = EltRange.getEnd().getRawEncoding() -440EltRange.getBegin().getRawEncoding();441for (unsigned I = 0; I < Len; ++I)442State[Off + I].assign(Block->getBlockID(), EltIndex, Len);443}444}445}446447// Finally, write the code with the correct <span>s.448unsigned Line =449AST.getSourceManager().getSpellingLineNumber(Range.getBegin());450*OS << "<template data-copy='code'>\n";451*OS << "<code class='filename'>";452llvm::printHTMLEscaped(453llvm::sys::path::filename(454AST.getSourceManager().getFilename(Range.getBegin())),455*OS);456*OS << "</code>";457*OS << "<code class='line' data-line='" << Line++ << "'>";458for (unsigned I = 0; I < Code.size(); ++I) {459// Don't actually write a <span> around each character, only break spans460// when the TokenInfo changes.461bool NeedOpen = I == 0 || !(State[I] == State[I-1]);462bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);463if (NeedOpen) {464*OS << "<span ";465State[I].write(*OS);466*OS << ">";467}468if (Code[I] == '\n')469*OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";470else471llvm::printHTMLEscaped(Code.substr(I, 1), *OS);472if (NeedClose) *OS << "</span>";473}474*OS << "</code>\n";475*OS << "</template>";476}477478// Write the CFG diagram, a graph of basic blocks.479// Laying out graphs is hard, so we construct a graphviz description and shell480// out to `dot` to turn it into an SVG.481void writeCFG() {482*OS << "<template data-copy='cfg'>\n";483if (auto SVG = renderSVG(buildCFGDot(ACFG->getCFG())))484*OS << *SVG;485else486*OS << "Can't draw CFG: " << toString(SVG.takeError());487*OS << "</template>\n";488}489490// Produce a graphviz description of a CFG.491std::string buildCFGDot(const clang::CFG &CFG) {492std::string Graph;493llvm::raw_string_ostream GraphS(Graph);494// Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.495GraphS << R"(digraph {496tooltip=" "497node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]498edge[tooltip = " "]499)";500for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) {501std::string Name = blockID(I);502// Rightwards arrow, vertical line503const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c";504if (BlockConverged[I])505Name += ConvergenceMarker;506GraphS << " " << blockID(I) << " [id=" << blockID(I) << " label=\""507<< Name << "\"]\n";508}509for (const auto *Block : CFG) {510for (const auto &Succ : Block->succs()) {511if (Succ.getReachableBlock())512GraphS << " " << blockID(Block->getBlockID()) << " -> "513<< blockID(Succ.getReachableBlock()->getBlockID()) << "\n";514}515}516GraphS << "}\n";517return Graph;518}519};520521// Nothing interesting here, just subprocess/temp-file plumbing.522llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {523std::string DotPath;524if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT"))525DotPath = FromEnv;526else {527auto FromPath = llvm::sys::findProgramByName("dot");528if (!FromPath)529return llvm::createStringError(FromPath.getError(),530"'dot' not found on PATH");531DotPath = FromPath.get();532}533534// Create input and output files for `dot` subprocess.535// (We create the output file as empty, to reserve the temp filename).536llvm::SmallString<256> Input, Output;537int InputFD;538if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,539Input))540return llvm::createStringError(EC, "failed to create `dot` temp input");541llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;542auto DeleteInput =543llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });544if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))545return llvm::createStringError(EC, "failed to create `dot` temp output");546auto DeleteOutput =547llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });548549std::vector<std::optional<llvm::StringRef>> Redirects = {550Input, Output,551/*stderr=*/std::nullopt};552std::string ErrMsg;553int Code = llvm::sys::ExecuteAndWait(554DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,555/*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);556if (!ErrMsg.empty())557return llvm::createStringError(llvm::inconvertibleErrorCode(),558"'dot' failed: " + ErrMsg);559if (Code != 0)560return llvm::createStringError(llvm::inconvertibleErrorCode(),561"'dot' failed (" + llvm::Twine(Code) + ")");562563auto Buf = llvm::MemoryBuffer::getFile(Output);564if (!Buf)565return llvm::createStringError(Buf.getError(), "Can't read `dot` output");566567// Output has <?xml> prefix we don't want. Skip to <svg> tag.568llvm::StringRef Result = Buf.get()->getBuffer();569auto Pos = Result.find("<svg");570if (Pos == llvm::StringRef::npos)571return llvm::createStringError(llvm::inconvertibleErrorCode(),572"Can't find <svg> tag in `dot` output");573return Result.substr(Pos).str();574}575576} // namespace577578std::unique_ptr<Logger>579Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {580return std::make_unique<HTMLLogger>(std::move(Streams));581}582583} // namespace clang::dataflow584585586