Path: blob/main/contrib/llvm-project/clang/lib/Basic/Sarif.cpp
35234 views
//===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// This file contains the declaration of the SARIFDocumentWriter class, and10/// associated builders such as:11/// - \ref SarifArtifact12/// - \ref SarifArtifactLocation13/// - \ref SarifRule14/// - \ref SarifResult15//===----------------------------------------------------------------------===//16#include "clang/Basic/Sarif.h"17#include "clang/Basic/SourceLocation.h"18#include "clang/Basic/SourceManager.h"19#include "llvm/ADT/ArrayRef.h"20#include "llvm/ADT/STLExtras.h"21#include "llvm/ADT/StringExtras.h"22#include "llvm/ADT/StringRef.h"23#include "llvm/Support/ConvertUTF.h"24#include "llvm/Support/JSON.h"25#include "llvm/Support/Path.h"2627#include <optional>28#include <string>29#include <utility>3031using namespace clang;32using namespace llvm;3334using clang::detail::SarifArtifact;35using clang::detail::SarifArtifactLocation;3637static StringRef getFileName(FileEntryRef FE) {38StringRef Filename = FE.getFileEntry().tryGetRealPathName();39if (Filename.empty())40Filename = FE.getName();41return Filename;42}43/// \name URI44/// @{4546/// \internal47/// \brief48/// Return the RFC3986 encoding of the input character.49///50/// \param C Character to encode to RFC3986.51///52/// \return The RFC3986 representation of \c C.53static std::string percentEncodeURICharacter(char C) {54// RFC 3986 claims alpha, numeric, and this handful of55// characters are not reserved for the path component and56// should be written out directly. Otherwise, percent57// encode the character and write that out instead of the58// reserved character.59if (llvm::isAlnum(C) || StringRef("-._~:@!$&'()*+,;=").contains(C))60return std::string(&C, 1);61return "%" + llvm::toHex(StringRef(&C, 1));62}6364/// \internal65/// \brief Return a URI representing the given file name.66///67/// \param Filename The filename to be represented as URI.68///69/// \return RFC3986 URI representing the input file name.70static std::string fileNameToURI(StringRef Filename) {71SmallString<32> Ret = StringRef("file://");7273// Get the root name to see if it has a URI authority.74StringRef Root = sys::path::root_name(Filename);75if (Root.starts_with("//")) {76// There is an authority, so add it to the URI.77Ret += Root.drop_front(2).str();78} else if (!Root.empty()) {79// There is no authority, so end the component and add the root to the URI.80Ret += Twine("/" + Root).str();81}8283auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);84assert(Iter != End && "Expected there to be a non-root path component.");85// Add the rest of the path components, encoding any reserved characters;86// we skip past the first path component, as it was handled it above.87for (StringRef Component : llvm::make_range(++Iter, End)) {88// For reasons unknown to me, we may get a backslash with Windows native89// paths for the initial backslash following the drive component, which90// we need to ignore as a URI path part.91if (Component == "\\")92continue;9394// Add the separator between the previous path part and the one being95// currently processed.96Ret += "/";9798// URI encode the part.99for (char C : Component) {100Ret += percentEncodeURICharacter(C);101}102}103104return std::string(Ret);105}106/// @}107108/// \brief Calculate the column position expressed in the number of UTF-8 code109/// points from column start to the source location110///111/// \param Loc The source location whose column needs to be calculated.112/// \param TokenLen Optional hint for when the token is multiple bytes long.113///114/// \return The column number as a UTF-8 aware byte offset from column start to115/// the effective source location.116static unsigned int adjustColumnPos(FullSourceLoc Loc,117unsigned int TokenLen = 0) {118assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");119120std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc();121std::optional<MemoryBufferRef> Buf =122Loc.getManager().getBufferOrNone(LocInfo.first);123assert(Buf && "got an invalid buffer for the location's file");124assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&125"token extends past end of buffer?");126127// Adjust the offset to be the start of the line, since we'll be counting128// Unicode characters from there until our column offset.129unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);130unsigned int Ret = 1;131while (Off < (LocInfo.second + TokenLen)) {132Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);133Ret++;134}135136return Ret;137}138139/// \name SARIF Utilities140/// @{141142/// \internal143json::Object createMessage(StringRef Text) {144return json::Object{{"text", Text.str()}};145}146147/// \internal148/// \pre CharSourceRange must be a token range149static json::Object createTextRegion(const SourceManager &SM,150const CharSourceRange &R) {151FullSourceLoc BeginCharLoc{R.getBegin(), SM};152FullSourceLoc EndCharLoc{R.getEnd(), SM};153json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()},154{"startColumn", adjustColumnPos(BeginCharLoc)}};155156if (BeginCharLoc == EndCharLoc) {157Region["endColumn"] = adjustColumnPos(BeginCharLoc);158} else {159Region["endLine"] = EndCharLoc.getExpansionLineNumber();160Region["endColumn"] = adjustColumnPos(EndCharLoc);161}162return Region;163}164165static json::Object createLocation(json::Object &&PhysicalLocation,166StringRef Message = "") {167json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};168if (!Message.empty())169Ret.insert({"message", createMessage(Message)});170return Ret;171}172173static StringRef importanceToStr(ThreadFlowImportance I) {174switch (I) {175case ThreadFlowImportance::Important:176return "important";177case ThreadFlowImportance::Essential:178return "essential";179case ThreadFlowImportance::Unimportant:180return "unimportant";181}182llvm_unreachable("Fully covered switch is not so fully covered");183}184185static StringRef resultLevelToStr(SarifResultLevel R) {186switch (R) {187case SarifResultLevel::None:188return "none";189case SarifResultLevel::Note:190return "note";191case SarifResultLevel::Warning:192return "warning";193case SarifResultLevel::Error:194return "error";195}196llvm_unreachable("Potentially un-handled SarifResultLevel. "197"Is the switch not fully covered?");198}199200static json::Object201createThreadFlowLocation(json::Object &&Location,202const ThreadFlowImportance &Importance) {203return json::Object{{"location", std::move(Location)},204{"importance", importanceToStr(Importance)}};205}206/// @}207208json::Object209SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {210assert(R.isValid() &&211"Cannot create a physicalLocation from invalid SourceRange!");212assert(R.isCharRange() &&213"Cannot create a physicalLocation from a token range!");214FullSourceLoc Start{R.getBegin(), SourceMgr};215OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef();216assert(FE && "Diagnostic does not exist within a valid file!");217218const std::string &FileURI = fileNameToURI(getFileName(*FE));219auto I = CurrentArtifacts.find(FileURI);220221if (I == CurrentArtifacts.end()) {222uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());223const SarifArtifactLocation &Location =224SarifArtifactLocation::create(FileURI).setIndex(Idx);225const SarifArtifact &Artifact = SarifArtifact::create(Location)226.setRoles({"resultFile"})227.setLength(FE->getSize())228.setMimeType("text/plain");229auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});230// If inserted, ensure the original iterator points to the newly inserted231// element, so it can be used downstream.232if (StatusIter.second)233I = StatusIter.first;234}235assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");236const SarifArtifactLocation &Location = I->second.Location;237json::Object ArtifactLocationObject{{"uri", Location.URI}};238if (Location.Index.has_value())239ArtifactLocationObject["index"] = *Location.Index;240return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)},241{"region", createTextRegion(SourceMgr, R)}}};242}243244json::Object &SarifDocumentWriter::getCurrentTool() {245assert(!Closed && "SARIF Document is closed. "246"Need to call createRun() before using getcurrentTool!");247248// Since Closed = false here, expect there to be at least 1 Run, anything249// else is an invalid state.250assert(!Runs.empty() && "There are no runs associated with the document!");251252return *Runs.back().getAsObject()->get("tool")->getAsObject();253}254255void SarifDocumentWriter::reset() {256CurrentRules.clear();257CurrentArtifacts.clear();258}259260void SarifDocumentWriter::endRun() {261// Exit early if trying to close a closed Document.262if (Closed) {263reset();264return;265}266267// Since Closed = false here, expect there to be at least 1 Run, anything268// else is an invalid state.269assert(!Runs.empty() && "There are no runs associated with the document!");270271// Flush all the rules.272json::Object &Tool = getCurrentTool();273json::Array Rules;274for (const SarifRule &R : CurrentRules) {275json::Object Config{276{"enabled", R.DefaultConfiguration.Enabled},277{"level", resultLevelToStr(R.DefaultConfiguration.Level)},278{"rank", R.DefaultConfiguration.Rank}};279json::Object Rule{280{"name", R.Name},281{"id", R.Id},282{"fullDescription", json::Object{{"text", R.Description}}},283{"defaultConfiguration", std::move(Config)}};284if (!R.HelpURI.empty())285Rule["helpUri"] = R.HelpURI;286Rules.emplace_back(std::move(Rule));287}288json::Object &Driver = *Tool.getObject("driver");289Driver["rules"] = std::move(Rules);290291// Flush all the artifacts.292json::Object &Run = getCurrentRun();293json::Array *Artifacts = Run.getArray("artifacts");294SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec;295for (const auto &[K, V] : CurrentArtifacts)296Vec.emplace_back(K, V);297llvm::sort(Vec, llvm::less_first());298for (const auto &[_, A] : Vec) {299json::Object Loc{{"uri", A.Location.URI}};300if (A.Location.Index.has_value()) {301Loc["index"] = static_cast<int64_t>(*A.Location.Index);302}303json::Object Artifact;304Artifact["location"] = std::move(Loc);305if (A.Length.has_value())306Artifact["length"] = static_cast<int64_t>(*A.Length);307if (!A.Roles.empty())308Artifact["roles"] = json::Array(A.Roles);309if (!A.MimeType.empty())310Artifact["mimeType"] = A.MimeType;311if (A.Offset.has_value())312Artifact["offset"] = *A.Offset;313Artifacts->push_back(json::Value(std::move(Artifact)));314}315316// Clear, reset temporaries before next run.317reset();318319// Mark the document as closed.320Closed = true;321}322323json::Array324SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {325json::Object Ret{{"locations", json::Array{}}};326json::Array Locs;327for (const auto &ThreadFlow : ThreadFlows) {328json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);329json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);330Locs.emplace_back(331createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));332}333Ret["locations"] = std::move(Locs);334return json::Array{std::move(Ret)};335}336337json::Object338SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {339return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};340}341342void SarifDocumentWriter::createRun(StringRef ShortToolName,343StringRef LongToolName,344StringRef ToolVersion) {345// Clear resources associated with a previous run.346endRun();347348// Signify a new run has begun.349Closed = false;350351json::Object Tool{352{"driver",353json::Object{{"name", ShortToolName},354{"fullName", LongToolName},355{"language", "en-US"},356{"version", ToolVersion},357{"informationUri",358"https://clang.llvm.org/docs/UsersManual.html"}}}};359json::Object TheRun{{"tool", std::move(Tool)},360{"results", {}},361{"artifacts", {}},362{"columnKind", "unicodeCodePoints"}};363Runs.emplace_back(std::move(TheRun));364}365366json::Object &SarifDocumentWriter::getCurrentRun() {367assert(!Closed &&368"SARIF Document is closed. "369"Can only getCurrentRun() if document is opened via createRun(), "370"create a run first");371372// Since Closed = false here, expect there to be at least 1 Run, anything373// else is an invalid state.374assert(!Runs.empty() && "There are no runs associated with the document!");375return *Runs.back().getAsObject();376}377378size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {379size_t Ret = CurrentRules.size();380CurrentRules.emplace_back(Rule);381return Ret;382}383384void SarifDocumentWriter::appendResult(const SarifResult &Result) {385size_t RuleIdx = Result.RuleIdx;386assert(RuleIdx < CurrentRules.size() &&387"Trying to reference a rule that doesn't exist");388const SarifRule &Rule = CurrentRules[RuleIdx];389assert(Rule.DefaultConfiguration.Enabled &&390"Cannot add a result referencing a disabled Rule");391json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},392{"ruleIndex", static_cast<int64_t>(RuleIdx)},393{"ruleId", Rule.Id}};394if (!Result.Locations.empty()) {395json::Array Locs;396for (auto &Range : Result.Locations) {397Locs.emplace_back(createLocation(createPhysicalLocation(Range)));398}399Ret["locations"] = std::move(Locs);400}401if (!Result.ThreadFlows.empty())402Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};403404Ret["level"] = resultLevelToStr(405Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level));406407json::Object &Run = getCurrentRun();408json::Array *Results = Run.getArray("results");409Results->emplace_back(std::move(Ret));410}411412json::Object SarifDocumentWriter::createDocument() {413// Flush all temporaries to their destinations if needed.414endRun();415416json::Object Doc{417{"$schema", SchemaURI},418{"version", SchemaVersion},419};420if (!Runs.empty())421Doc["runs"] = json::Array(Runs);422return Doc;423}424425426