Path: blob/main/contrib/llvm-project/clang/lib/Tooling/ASTDiff/ASTDiff.cpp
35266 views
//===- ASTDiff.cpp - AST differencing implementation-----------*- C++ -*- -===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains definitons for the AST differencing interface.9//10//===----------------------------------------------------------------------===//1112#include "clang/Tooling/ASTDiff/ASTDiff.h"13#include "clang/AST/ParentMapContext.h"14#include "clang/AST/RecursiveASTVisitor.h"15#include "clang/Basic/SourceManager.h"16#include "clang/Lex/Lexer.h"17#include "llvm/ADT/PriorityQueue.h"1819#include <limits>20#include <memory>21#include <optional>22#include <unordered_set>2324using namespace llvm;25using namespace clang;2627namespace clang {28namespace diff {2930namespace {31/// Maps nodes of the left tree to ones on the right, and vice versa.32class Mapping {33public:34Mapping() = default;35Mapping(Mapping &&Other) = default;36Mapping &operator=(Mapping &&Other) = default;3738Mapping(size_t Size) {39SrcToDst = std::make_unique<NodeId[]>(Size);40DstToSrc = std::make_unique<NodeId[]>(Size);41}4243void link(NodeId Src, NodeId Dst) {44SrcToDst[Src] = Dst, DstToSrc[Dst] = Src;45}4647NodeId getDst(NodeId Src) const { return SrcToDst[Src]; }48NodeId getSrc(NodeId Dst) const { return DstToSrc[Dst]; }49bool hasSrc(NodeId Src) const { return getDst(Src).isValid(); }50bool hasDst(NodeId Dst) const { return getSrc(Dst).isValid(); }5152private:53std::unique_ptr<NodeId[]> SrcToDst, DstToSrc;54};55} // end anonymous namespace5657class ASTDiff::Impl {58public:59SyntaxTree::Impl &T1, &T2;60Mapping TheMapping;6162Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2,63const ComparisonOptions &Options);6465/// Matches nodes one-by-one based on their similarity.66void computeMapping();6768// Compute Change for each node based on similarity.69void computeChangeKinds(Mapping &M);7071NodeId getMapped(const std::unique_ptr<SyntaxTree::Impl> &Tree,72NodeId Id) const {73if (&*Tree == &T1)74return TheMapping.getDst(Id);75assert(&*Tree == &T2 && "Invalid tree.");76return TheMapping.getSrc(Id);77}7879private:80// Returns true if the two subtrees are identical.81bool identical(NodeId Id1, NodeId Id2) const;8283// Returns false if the nodes must not be mached.84bool isMatchingPossible(NodeId Id1, NodeId Id2) const;8586// Returns true if the nodes' parents are matched.87bool haveSameParents(const Mapping &M, NodeId Id1, NodeId Id2) const;8889// Uses an optimal albeit slow algorithm to compute a mapping between two90// subtrees, but only if both have fewer nodes than MaxSize.91void addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const;9293// Computes the ratio of common descendants between the two nodes.94// Descendants are only considered to be equal when they are mapped in M.95double getJaccardSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const;9697// Returns the node that has the highest degree of similarity.98NodeId findCandidate(const Mapping &M, NodeId Id1) const;99100// Returns a mapping of identical subtrees.101Mapping matchTopDown() const;102103// Tries to match any yet unmapped nodes, in a bottom-up fashion.104void matchBottomUp(Mapping &M) const;105106const ComparisonOptions &Options;107108friend class ZhangShashaMatcher;109};110111/// Represents the AST of a TranslationUnit.112class SyntaxTree::Impl {113public:114Impl(SyntaxTree *Parent, ASTContext &AST);115/// Constructs a tree from an AST node.116Impl(SyntaxTree *Parent, Decl *N, ASTContext &AST);117Impl(SyntaxTree *Parent, Stmt *N, ASTContext &AST);118template <class T>119Impl(SyntaxTree *Parent,120std::enable_if_t<std::is_base_of_v<Stmt, T>, T> *Node, ASTContext &AST)121: Impl(Parent, dyn_cast<Stmt>(Node), AST) {}122template <class T>123Impl(SyntaxTree *Parent,124std::enable_if_t<std::is_base_of_v<Decl, T>, T> *Node, ASTContext &AST)125: Impl(Parent, dyn_cast<Decl>(Node), AST) {}126127SyntaxTree *Parent;128ASTContext &AST;129PrintingPolicy TypePP;130/// Nodes in preorder.131std::vector<Node> Nodes;132std::vector<NodeId> Leaves;133// Maps preorder indices to postorder ones.134std::vector<int> PostorderIds;135std::vector<NodeId> NodesBfs;136137int getSize() const { return Nodes.size(); }138NodeId getRootId() const { return 0; }139PreorderIterator begin() const { return getRootId(); }140PreorderIterator end() const { return getSize(); }141142const Node &getNode(NodeId Id) const { return Nodes[Id]; }143Node &getMutableNode(NodeId Id) { return Nodes[Id]; }144bool isValidNodeId(NodeId Id) const { return Id >= 0 && Id < getSize(); }145void addNode(Node &N) { Nodes.push_back(N); }146int getNumberOfDescendants(NodeId Id) const;147bool isInSubtree(NodeId Id, NodeId SubtreeRoot) const;148int findPositionInParent(NodeId Id, bool Shifted = false) const;149150std::string getRelativeName(const NamedDecl *ND,151const DeclContext *Context) const;152std::string getRelativeName(const NamedDecl *ND) const;153154std::string getNodeValue(NodeId Id) const;155std::string getNodeValue(const Node &Node) const;156std::string getDeclValue(const Decl *D) const;157std::string getStmtValue(const Stmt *S) const;158159private:160void initTree();161void setLeftMostDescendants();162};163164static bool isSpecializedNodeExcluded(const Decl *D) { return D->isImplicit(); }165static bool isSpecializedNodeExcluded(const Stmt *S) { return false; }166static bool isSpecializedNodeExcluded(CXXCtorInitializer *I) {167return !I->isWritten();168}169170template <class T>171static bool isNodeExcluded(const SourceManager &SrcMgr, T *N) {172if (!N)173return true;174SourceLocation SLoc = N->getSourceRange().getBegin();175if (SLoc.isValid()) {176// Ignore everything from other files.177if (!SrcMgr.isInMainFile(SLoc))178return true;179// Ignore macros.180if (SLoc != SrcMgr.getSpellingLoc(SLoc))181return true;182}183return isSpecializedNodeExcluded(N);184}185186namespace {187// Sets Height, Parent and Children for each node.188struct PreorderVisitor : public RecursiveASTVisitor<PreorderVisitor> {189int Id = 0, Depth = 0;190NodeId Parent;191SyntaxTree::Impl &Tree;192193PreorderVisitor(SyntaxTree::Impl &Tree) : Tree(Tree) {}194195template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) {196NodeId MyId = Id;197Tree.Nodes.emplace_back();198Node &N = Tree.getMutableNode(MyId);199N.Parent = Parent;200N.Depth = Depth;201N.ASTNode = DynTypedNode::create(*ASTNode);202assert(!N.ASTNode.getNodeKind().isNone() &&203"Expected nodes to have a valid kind.");204if (Parent.isValid()) {205Node &P = Tree.getMutableNode(Parent);206P.Children.push_back(MyId);207}208Parent = MyId;209++Id;210++Depth;211return std::make_tuple(MyId, Tree.getNode(MyId).Parent);212}213void PostTraverse(std::tuple<NodeId, NodeId> State) {214NodeId MyId, PreviousParent;215std::tie(MyId, PreviousParent) = State;216assert(MyId.isValid() && "Expecting to only traverse valid nodes.");217Parent = PreviousParent;218--Depth;219Node &N = Tree.getMutableNode(MyId);220N.RightMostDescendant = Id - 1;221assert(N.RightMostDescendant >= 0 &&222N.RightMostDescendant < Tree.getSize() &&223"Rightmost descendant must be a valid tree node.");224if (N.isLeaf())225Tree.Leaves.push_back(MyId);226N.Height = 1;227for (NodeId Child : N.Children)228N.Height = std::max(N.Height, 1 + Tree.getNode(Child).Height);229}230bool TraverseDecl(Decl *D) {231if (isNodeExcluded(Tree.AST.getSourceManager(), D))232return true;233auto SavedState = PreTraverse(D);234RecursiveASTVisitor<PreorderVisitor>::TraverseDecl(D);235PostTraverse(SavedState);236return true;237}238bool TraverseStmt(Stmt *S) {239if (auto *E = dyn_cast_or_null<Expr>(S))240S = E->IgnoreImplicit();241if (isNodeExcluded(Tree.AST.getSourceManager(), S))242return true;243auto SavedState = PreTraverse(S);244RecursiveASTVisitor<PreorderVisitor>::TraverseStmt(S);245PostTraverse(SavedState);246return true;247}248bool TraverseType(QualType T) { return true; }249bool TraverseConstructorInitializer(CXXCtorInitializer *Init) {250if (isNodeExcluded(Tree.AST.getSourceManager(), Init))251return true;252auto SavedState = PreTraverse(Init);253RecursiveASTVisitor<PreorderVisitor>::TraverseConstructorInitializer(Init);254PostTraverse(SavedState);255return true;256}257};258} // end anonymous namespace259260SyntaxTree::Impl::Impl(SyntaxTree *Parent, ASTContext &AST)261: Parent(Parent), AST(AST), TypePP(AST.getLangOpts()) {262TypePP.AnonymousTagLocations = false;263}264265SyntaxTree::Impl::Impl(SyntaxTree *Parent, Decl *N, ASTContext &AST)266: Impl(Parent, AST) {267PreorderVisitor PreorderWalker(*this);268PreorderWalker.TraverseDecl(N);269initTree();270}271272SyntaxTree::Impl::Impl(SyntaxTree *Parent, Stmt *N, ASTContext &AST)273: Impl(Parent, AST) {274PreorderVisitor PreorderWalker(*this);275PreorderWalker.TraverseStmt(N);276initTree();277}278279static std::vector<NodeId> getSubtreePostorder(const SyntaxTree::Impl &Tree,280NodeId Root) {281std::vector<NodeId> Postorder;282std::function<void(NodeId)> Traverse = [&](NodeId Id) {283const Node &N = Tree.getNode(Id);284for (NodeId Child : N.Children)285Traverse(Child);286Postorder.push_back(Id);287};288Traverse(Root);289return Postorder;290}291292static std::vector<NodeId> getSubtreeBfs(const SyntaxTree::Impl &Tree,293NodeId Root) {294std::vector<NodeId> Ids;295size_t Expanded = 0;296Ids.push_back(Root);297while (Expanded < Ids.size())298for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children)299Ids.push_back(Child);300return Ids;301}302303void SyntaxTree::Impl::initTree() {304setLeftMostDescendants();305int PostorderId = 0;306PostorderIds.resize(getSize());307std::function<void(NodeId)> PostorderTraverse = [&](NodeId Id) {308for (NodeId Child : getNode(Id).Children)309PostorderTraverse(Child);310PostorderIds[Id] = PostorderId;311++PostorderId;312};313PostorderTraverse(getRootId());314NodesBfs = getSubtreeBfs(*this, getRootId());315}316317void SyntaxTree::Impl::setLeftMostDescendants() {318for (NodeId Leaf : Leaves) {319getMutableNode(Leaf).LeftMostDescendant = Leaf;320NodeId Parent, Cur = Leaf;321while ((Parent = getNode(Cur).Parent).isValid() &&322getNode(Parent).Children[0] == Cur) {323Cur = Parent;324getMutableNode(Cur).LeftMostDescendant = Leaf;325}326}327}328329int SyntaxTree::Impl::getNumberOfDescendants(NodeId Id) const {330return getNode(Id).RightMostDescendant - Id + 1;331}332333bool SyntaxTree::Impl::isInSubtree(NodeId Id, NodeId SubtreeRoot) const {334return Id >= SubtreeRoot && Id <= getNode(SubtreeRoot).RightMostDescendant;335}336337int SyntaxTree::Impl::findPositionInParent(NodeId Id, bool Shifted) const {338NodeId Parent = getNode(Id).Parent;339if (Parent.isInvalid())340return 0;341const auto &Siblings = getNode(Parent).Children;342int Position = 0;343for (size_t I = 0, E = Siblings.size(); I < E; ++I) {344if (Shifted)345Position += getNode(Siblings[I]).Shift;346if (Siblings[I] == Id) {347Position += I;348return Position;349}350}351llvm_unreachable("Node not found in parent's children.");352}353354// Returns the qualified name of ND. If it is subordinate to Context,355// then the prefix of the latter is removed from the returned value.356std::string357SyntaxTree::Impl::getRelativeName(const NamedDecl *ND,358const DeclContext *Context) const {359std::string Val = ND->getQualifiedNameAsString();360std::string ContextPrefix;361if (!Context)362return Val;363if (auto *Namespace = dyn_cast<NamespaceDecl>(Context))364ContextPrefix = Namespace->getQualifiedNameAsString();365else if (auto *Record = dyn_cast<RecordDecl>(Context))366ContextPrefix = Record->getQualifiedNameAsString();367else if (AST.getLangOpts().CPlusPlus11)368if (auto *Tag = dyn_cast<TagDecl>(Context))369ContextPrefix = Tag->getQualifiedNameAsString();370// Strip the qualifier, if Val refers to something in the current scope.371// But leave one leading ':' in place, so that we know that this is a372// relative path.373if (!ContextPrefix.empty() && StringRef(Val).starts_with(ContextPrefix))374Val = Val.substr(ContextPrefix.size() + 1);375return Val;376}377378std::string SyntaxTree::Impl::getRelativeName(const NamedDecl *ND) const {379return getRelativeName(ND, ND->getDeclContext());380}381382static const DeclContext *getEnclosingDeclContext(ASTContext &AST,383const Stmt *S) {384while (S) {385const auto &Parents = AST.getParents(*S);386if (Parents.empty())387return nullptr;388const auto &P = Parents[0];389if (const auto *D = P.get<Decl>())390return D->getDeclContext();391S = P.get<Stmt>();392}393return nullptr;394}395396static std::string getInitializerValue(const CXXCtorInitializer *Init,397const PrintingPolicy &TypePP) {398if (Init->isAnyMemberInitializer())399return std::string(Init->getAnyMember()->getName());400if (Init->isBaseInitializer())401return QualType(Init->getBaseClass(), 0).getAsString(TypePP);402if (Init->isDelegatingInitializer())403return Init->getTypeSourceInfo()->getType().getAsString(TypePP);404llvm_unreachable("Unknown initializer type");405}406407std::string SyntaxTree::Impl::getNodeValue(NodeId Id) const {408return getNodeValue(getNode(Id));409}410411std::string SyntaxTree::Impl::getNodeValue(const Node &N) const {412const DynTypedNode &DTN = N.ASTNode;413if (auto *S = DTN.get<Stmt>())414return getStmtValue(S);415if (auto *D = DTN.get<Decl>())416return getDeclValue(D);417if (auto *Init = DTN.get<CXXCtorInitializer>())418return getInitializerValue(Init, TypePP);419llvm_unreachable("Fatal: unhandled AST node.\n");420}421422std::string SyntaxTree::Impl::getDeclValue(const Decl *D) const {423std::string Value;424if (auto *V = dyn_cast<ValueDecl>(D))425return getRelativeName(V) + "(" + V->getType().getAsString(TypePP) + ")";426if (auto *N = dyn_cast<NamedDecl>(D))427Value += getRelativeName(N) + ";";428if (auto *T = dyn_cast<TypedefNameDecl>(D))429return Value + T->getUnderlyingType().getAsString(TypePP) + ";";430if (auto *T = dyn_cast<TypeDecl>(D))431if (T->getTypeForDecl())432Value +=433T->getTypeForDecl()->getCanonicalTypeInternal().getAsString(TypePP) +434";";435if (auto *U = dyn_cast<UsingDirectiveDecl>(D))436return std::string(U->getNominatedNamespace()->getName());437if (auto *A = dyn_cast<AccessSpecDecl>(D)) {438CharSourceRange Range(A->getSourceRange(), false);439return std::string(440Lexer::getSourceText(Range, AST.getSourceManager(), AST.getLangOpts()));441}442return Value;443}444445std::string SyntaxTree::Impl::getStmtValue(const Stmt *S) const {446if (auto *U = dyn_cast<UnaryOperator>(S))447return std::string(UnaryOperator::getOpcodeStr(U->getOpcode()));448if (auto *B = dyn_cast<BinaryOperator>(S))449return std::string(B->getOpcodeStr());450if (auto *M = dyn_cast<MemberExpr>(S))451return getRelativeName(M->getMemberDecl());452if (auto *I = dyn_cast<IntegerLiteral>(S)) {453SmallString<256> Str;454I->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false);455return std::string(Str);456}457if (auto *F = dyn_cast<FloatingLiteral>(S)) {458SmallString<256> Str;459F->getValue().toString(Str);460return std::string(Str);461}462if (auto *D = dyn_cast<DeclRefExpr>(S))463return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));464if (auto *String = dyn_cast<StringLiteral>(S))465return std::string(String->getString());466if (auto *B = dyn_cast<CXXBoolLiteralExpr>(S))467return B->getValue() ? "true" : "false";468return "";469}470471/// Identifies a node in a subtree by its postorder offset, starting at 1.472struct SNodeId {473int Id = 0;474475explicit SNodeId(int Id) : Id(Id) {}476explicit SNodeId() = default;477478operator int() const { return Id; }479SNodeId &operator++() { return ++Id, *this; }480SNodeId &operator--() { return --Id, *this; }481SNodeId operator+(int Other) const { return SNodeId(Id + Other); }482};483484class Subtree {485private:486/// The parent tree.487const SyntaxTree::Impl &Tree;488/// Maps SNodeIds to original ids.489std::vector<NodeId> RootIds;490/// Maps subtree nodes to their leftmost descendants wtihin the subtree.491std::vector<SNodeId> LeftMostDescendants;492493public:494std::vector<SNodeId> KeyRoots;495496Subtree(const SyntaxTree::Impl &Tree, NodeId SubtreeRoot) : Tree(Tree) {497RootIds = getSubtreePostorder(Tree, SubtreeRoot);498int NumLeaves = setLeftMostDescendants();499computeKeyRoots(NumLeaves);500}501int getSize() const { return RootIds.size(); }502NodeId getIdInRoot(SNodeId Id) const {503assert(Id > 0 && Id <= getSize() && "Invalid subtree node index.");504return RootIds[Id - 1];505}506const Node &getNode(SNodeId Id) const {507return Tree.getNode(getIdInRoot(Id));508}509SNodeId getLeftMostDescendant(SNodeId Id) const {510assert(Id > 0 && Id <= getSize() && "Invalid subtree node index.");511return LeftMostDescendants[Id - 1];512}513/// Returns the postorder index of the leftmost descendant in the subtree.514NodeId getPostorderOffset() const {515return Tree.PostorderIds[getIdInRoot(SNodeId(1))];516}517std::string getNodeValue(SNodeId Id) const {518return Tree.getNodeValue(getIdInRoot(Id));519}520521private:522/// Returns the number of leafs in the subtree.523int setLeftMostDescendants() {524int NumLeaves = 0;525LeftMostDescendants.resize(getSize());526for (int I = 0; I < getSize(); ++I) {527SNodeId SI(I + 1);528const Node &N = getNode(SI);529NumLeaves += N.isLeaf();530assert(I == Tree.PostorderIds[getIdInRoot(SI)] - getPostorderOffset() &&531"Postorder traversal in subtree should correspond to traversal in "532"the root tree by a constant offset.");533LeftMostDescendants[I] = SNodeId(Tree.PostorderIds[N.LeftMostDescendant] -534getPostorderOffset());535}536return NumLeaves;537}538void computeKeyRoots(int Leaves) {539KeyRoots.resize(Leaves);540std::unordered_set<int> Visited;541int K = Leaves - 1;542for (SNodeId I(getSize()); I > 0; --I) {543SNodeId LeftDesc = getLeftMostDescendant(I);544if (Visited.count(LeftDesc))545continue;546assert(K >= 0 && "K should be non-negative");547KeyRoots[K] = I;548Visited.insert(LeftDesc);549--K;550}551}552};553554/// Implementation of Zhang and Shasha's Algorithm for tree edit distance.555/// Computes an optimal mapping between two trees using only insertion,556/// deletion and update as edit actions (similar to the Levenshtein distance).557class ZhangShashaMatcher {558const ASTDiff::Impl &DiffImpl;559Subtree S1;560Subtree S2;561std::unique_ptr<std::unique_ptr<double[]>[]> TreeDist, ForestDist;562563public:564ZhangShashaMatcher(const ASTDiff::Impl &DiffImpl, const SyntaxTree::Impl &T1,565const SyntaxTree::Impl &T2, NodeId Id1, NodeId Id2)566: DiffImpl(DiffImpl), S1(T1, Id1), S2(T2, Id2) {567TreeDist = std::make_unique<std::unique_ptr<double[]>[]>(568size_t(S1.getSize()) + 1);569ForestDist = std::make_unique<std::unique_ptr<double[]>[]>(570size_t(S1.getSize()) + 1);571for (int I = 0, E = S1.getSize() + 1; I < E; ++I) {572TreeDist[I] = std::make_unique<double[]>(size_t(S2.getSize()) + 1);573ForestDist[I] = std::make_unique<double[]>(size_t(S2.getSize()) + 1);574}575}576577std::vector<std::pair<NodeId, NodeId>> getMatchingNodes() {578std::vector<std::pair<NodeId, NodeId>> Matches;579std::vector<std::pair<SNodeId, SNodeId>> TreePairs;580581computeTreeDist();582583bool RootNodePair = true;584585TreePairs.emplace_back(SNodeId(S1.getSize()), SNodeId(S2.getSize()));586587while (!TreePairs.empty()) {588SNodeId LastRow, LastCol, FirstRow, FirstCol, Row, Col;589std::tie(LastRow, LastCol) = TreePairs.back();590TreePairs.pop_back();591592if (!RootNodePair) {593computeForestDist(LastRow, LastCol);594}595596RootNodePair = false;597598FirstRow = S1.getLeftMostDescendant(LastRow);599FirstCol = S2.getLeftMostDescendant(LastCol);600601Row = LastRow;602Col = LastCol;603604while (Row > FirstRow || Col > FirstCol) {605if (Row > FirstRow &&606ForestDist[Row - 1][Col] + 1 == ForestDist[Row][Col]) {607--Row;608} else if (Col > FirstCol &&609ForestDist[Row][Col - 1] + 1 == ForestDist[Row][Col]) {610--Col;611} else {612SNodeId LMD1 = S1.getLeftMostDescendant(Row);613SNodeId LMD2 = S2.getLeftMostDescendant(Col);614if (LMD1 == S1.getLeftMostDescendant(LastRow) &&615LMD2 == S2.getLeftMostDescendant(LastCol)) {616NodeId Id1 = S1.getIdInRoot(Row);617NodeId Id2 = S2.getIdInRoot(Col);618assert(DiffImpl.isMatchingPossible(Id1, Id2) &&619"These nodes must not be matched.");620Matches.emplace_back(Id1, Id2);621--Row;622--Col;623} else {624TreePairs.emplace_back(Row, Col);625Row = LMD1;626Col = LMD2;627}628}629}630}631return Matches;632}633634private:635/// We use a simple cost model for edit actions, which seems good enough.636/// Simple cost model for edit actions. This seems to make the matching637/// algorithm perform reasonably well.638/// The values range between 0 and 1, or infinity if this edit action should639/// always be avoided.640static constexpr double DeletionCost = 1;641static constexpr double InsertionCost = 1;642643double getUpdateCost(SNodeId Id1, SNodeId Id2) {644if (!DiffImpl.isMatchingPossible(S1.getIdInRoot(Id1), S2.getIdInRoot(Id2)))645return std::numeric_limits<double>::max();646return S1.getNodeValue(Id1) != S2.getNodeValue(Id2);647}648649void computeTreeDist() {650for (SNodeId Id1 : S1.KeyRoots)651for (SNodeId Id2 : S2.KeyRoots)652computeForestDist(Id1, Id2);653}654655void computeForestDist(SNodeId Id1, SNodeId Id2) {656assert(Id1 > 0 && Id2 > 0 && "Expecting offsets greater than 0.");657SNodeId LMD1 = S1.getLeftMostDescendant(Id1);658SNodeId LMD2 = S2.getLeftMostDescendant(Id2);659660ForestDist[LMD1][LMD2] = 0;661for (SNodeId D1 = LMD1 + 1; D1 <= Id1; ++D1) {662ForestDist[D1][LMD2] = ForestDist[D1 - 1][LMD2] + DeletionCost;663for (SNodeId D2 = LMD2 + 1; D2 <= Id2; ++D2) {664ForestDist[LMD1][D2] = ForestDist[LMD1][D2 - 1] + InsertionCost;665SNodeId DLMD1 = S1.getLeftMostDescendant(D1);666SNodeId DLMD2 = S2.getLeftMostDescendant(D2);667if (DLMD1 == LMD1 && DLMD2 == LMD2) {668double UpdateCost = getUpdateCost(D1, D2);669ForestDist[D1][D2] =670std::min({ForestDist[D1 - 1][D2] + DeletionCost,671ForestDist[D1][D2 - 1] + InsertionCost,672ForestDist[D1 - 1][D2 - 1] + UpdateCost});673TreeDist[D1][D2] = ForestDist[D1][D2];674} else {675ForestDist[D1][D2] =676std::min({ForestDist[D1 - 1][D2] + DeletionCost,677ForestDist[D1][D2 - 1] + InsertionCost,678ForestDist[DLMD1][DLMD2] + TreeDist[D1][D2]});679}680}681}682}683};684685ASTNodeKind Node::getType() const { return ASTNode.getNodeKind(); }686687StringRef Node::getTypeLabel() const { return getType().asStringRef(); }688689std::optional<std::string> Node::getQualifiedIdentifier() const {690if (auto *ND = ASTNode.get<NamedDecl>()) {691if (ND->getDeclName().isIdentifier())692return ND->getQualifiedNameAsString();693}694return std::nullopt;695}696697std::optional<StringRef> Node::getIdentifier() const {698if (auto *ND = ASTNode.get<NamedDecl>()) {699if (ND->getDeclName().isIdentifier())700return ND->getName();701}702return std::nullopt;703}704705namespace {706// Compares nodes by their depth.707struct HeightLess {708const SyntaxTree::Impl &Tree;709HeightLess(const SyntaxTree::Impl &Tree) : Tree(Tree) {}710bool operator()(NodeId Id1, NodeId Id2) const {711return Tree.getNode(Id1).Height < Tree.getNode(Id2).Height;712}713};714} // end anonymous namespace715716namespace {717// Priority queue for nodes, sorted descendingly by their height.718class PriorityList {719const SyntaxTree::Impl &Tree;720HeightLess Cmp;721std::vector<NodeId> Container;722PriorityQueue<NodeId, std::vector<NodeId>, HeightLess> List;723724public:725PriorityList(const SyntaxTree::Impl &Tree)726: Tree(Tree), Cmp(Tree), List(Cmp, Container) {}727728void push(NodeId id) { List.push(id); }729730std::vector<NodeId> pop() {731int Max = peekMax();732std::vector<NodeId> Result;733if (Max == 0)734return Result;735while (peekMax() == Max) {736Result.push_back(List.top());737List.pop();738}739// TODO this is here to get a stable output, not a good heuristic740llvm::sort(Result);741return Result;742}743int peekMax() const {744if (List.empty())745return 0;746return Tree.getNode(List.top()).Height;747}748void open(NodeId Id) {749for (NodeId Child : Tree.getNode(Id).Children)750push(Child);751}752};753} // end anonymous namespace754755bool ASTDiff::Impl::identical(NodeId Id1, NodeId Id2) const {756const Node &N1 = T1.getNode(Id1);757const Node &N2 = T2.getNode(Id2);758if (N1.Children.size() != N2.Children.size() ||759!isMatchingPossible(Id1, Id2) ||760T1.getNodeValue(Id1) != T2.getNodeValue(Id2))761return false;762for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id)763if (!identical(N1.Children[Id], N2.Children[Id]))764return false;765return true;766}767768bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2) const {769return Options.isMatchingAllowed(T1.getNode(Id1), T2.getNode(Id2));770}771772bool ASTDiff::Impl::haveSameParents(const Mapping &M, NodeId Id1,773NodeId Id2) const {774NodeId P1 = T1.getNode(Id1).Parent;775NodeId P2 = T2.getNode(Id2).Parent;776return (P1.isInvalid() && P2.isInvalid()) ||777(P1.isValid() && P2.isValid() && M.getDst(P1) == P2);778}779780void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1,781NodeId Id2) const {782if (std::max(T1.getNumberOfDescendants(Id1), T2.getNumberOfDescendants(Id2)) >783Options.MaxSize)784return;785ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2);786std::vector<std::pair<NodeId, NodeId>> R = Matcher.getMatchingNodes();787for (const auto &Tuple : R) {788NodeId Src = Tuple.first;789NodeId Dst = Tuple.second;790if (!M.hasSrc(Src) && !M.hasDst(Dst))791M.link(Src, Dst);792}793}794795double ASTDiff::Impl::getJaccardSimilarity(const Mapping &M, NodeId Id1,796NodeId Id2) const {797int CommonDescendants = 0;798const Node &N1 = T1.getNode(Id1);799// Count the common descendants, excluding the subtree root.800for (NodeId Src = Id1 + 1; Src <= N1.RightMostDescendant; ++Src) {801NodeId Dst = M.getDst(Src);802CommonDescendants += int(Dst.isValid() && T2.isInSubtree(Dst, Id2));803}804// We need to subtract 1 to get the number of descendants excluding the root.805double Denominator = T1.getNumberOfDescendants(Id1) - 1 +806T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants;807// CommonDescendants is less than the size of one subtree.808assert(Denominator >= 0 && "Expected non-negative denominator.");809if (Denominator == 0)810return 0;811return CommonDescendants / Denominator;812}813814NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const {815NodeId Candidate;816double HighestSimilarity = 0.0;817for (NodeId Id2 : T2) {818if (!isMatchingPossible(Id1, Id2))819continue;820if (M.hasDst(Id2))821continue;822double Similarity = getJaccardSimilarity(M, Id1, Id2);823if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity) {824HighestSimilarity = Similarity;825Candidate = Id2;826}827}828return Candidate;829}830831void ASTDiff::Impl::matchBottomUp(Mapping &M) const {832std::vector<NodeId> Postorder = getSubtreePostorder(T1, T1.getRootId());833for (NodeId Id1 : Postorder) {834if (Id1 == T1.getRootId() && !M.hasSrc(T1.getRootId()) &&835!M.hasDst(T2.getRootId())) {836if (isMatchingPossible(T1.getRootId(), T2.getRootId())) {837M.link(T1.getRootId(), T2.getRootId());838addOptimalMapping(M, T1.getRootId(), T2.getRootId());839}840break;841}842bool Matched = M.hasSrc(Id1);843const Node &N1 = T1.getNode(Id1);844bool MatchedChildren = llvm::any_of(845N1.Children, [&](NodeId Child) { return M.hasSrc(Child); });846if (Matched || !MatchedChildren)847continue;848NodeId Id2 = findCandidate(M, Id1);849if (Id2.isValid()) {850M.link(Id1, Id2);851addOptimalMapping(M, Id1, Id2);852}853}854}855856Mapping ASTDiff::Impl::matchTopDown() const {857PriorityList L1(T1);858PriorityList L2(T2);859860Mapping M(T1.getSize() + T2.getSize());861862L1.push(T1.getRootId());863L2.push(T2.getRootId());864865int Max1, Max2;866while (std::min(Max1 = L1.peekMax(), Max2 = L2.peekMax()) >867Options.MinHeight) {868if (Max1 > Max2) {869for (NodeId Id : L1.pop())870L1.open(Id);871continue;872}873if (Max2 > Max1) {874for (NodeId Id : L2.pop())875L2.open(Id);876continue;877}878std::vector<NodeId> H1, H2;879H1 = L1.pop();880H2 = L2.pop();881for (NodeId Id1 : H1) {882for (NodeId Id2 : H2) {883if (identical(Id1, Id2) && !M.hasSrc(Id1) && !M.hasDst(Id2)) {884for (int I = 0, E = T1.getNumberOfDescendants(Id1); I < E; ++I)885M.link(Id1 + I, Id2 + I);886}887}888}889for (NodeId Id1 : H1) {890if (!M.hasSrc(Id1))891L1.open(Id1);892}893for (NodeId Id2 : H2) {894if (!M.hasDst(Id2))895L2.open(Id2);896}897}898return M;899}900901ASTDiff::Impl::Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2,902const ComparisonOptions &Options)903: T1(T1), T2(T2), Options(Options) {904computeMapping();905computeChangeKinds(TheMapping);906}907908void ASTDiff::Impl::computeMapping() {909TheMapping = matchTopDown();910if (Options.StopAfterTopDown)911return;912matchBottomUp(TheMapping);913}914915void ASTDiff::Impl::computeChangeKinds(Mapping &M) {916for (NodeId Id1 : T1) {917if (!M.hasSrc(Id1)) {918T1.getMutableNode(Id1).Change = Delete;919T1.getMutableNode(Id1).Shift -= 1;920}921}922for (NodeId Id2 : T2) {923if (!M.hasDst(Id2)) {924T2.getMutableNode(Id2).Change = Insert;925T2.getMutableNode(Id2).Shift -= 1;926}927}928for (NodeId Id1 : T1.NodesBfs) {929NodeId Id2 = M.getDst(Id1);930if (Id2.isInvalid())931continue;932if (!haveSameParents(M, Id1, Id2) ||933T1.findPositionInParent(Id1, true) !=934T2.findPositionInParent(Id2, true)) {935T1.getMutableNode(Id1).Shift -= 1;936T2.getMutableNode(Id2).Shift -= 1;937}938}939for (NodeId Id2 : T2.NodesBfs) {940NodeId Id1 = M.getSrc(Id2);941if (Id1.isInvalid())942continue;943Node &N1 = T1.getMutableNode(Id1);944Node &N2 = T2.getMutableNode(Id2);945if (Id1.isInvalid())946continue;947if (!haveSameParents(M, Id1, Id2) ||948T1.findPositionInParent(Id1, true) !=949T2.findPositionInParent(Id2, true)) {950N1.Change = N2.Change = Move;951}952if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) {953N1.Change = N2.Change = (N1.Change == Move ? UpdateMove : Update);954}955}956}957958ASTDiff::ASTDiff(SyntaxTree &T1, SyntaxTree &T2,959const ComparisonOptions &Options)960: DiffImpl(std::make_unique<Impl>(*T1.TreeImpl, *T2.TreeImpl, Options)) {}961962ASTDiff::~ASTDiff() = default;963964NodeId ASTDiff::getMapped(const SyntaxTree &SourceTree, NodeId Id) const {965return DiffImpl->getMapped(SourceTree.TreeImpl, Id);966}967968SyntaxTree::SyntaxTree(ASTContext &AST)969: TreeImpl(std::make_unique<SyntaxTree::Impl>(970this, AST.getTranslationUnitDecl(), AST)) {}971972SyntaxTree::~SyntaxTree() = default;973974const ASTContext &SyntaxTree::getASTContext() const { return TreeImpl->AST; }975976const Node &SyntaxTree::getNode(NodeId Id) const {977return TreeImpl->getNode(Id);978}979980int SyntaxTree::getSize() const { return TreeImpl->getSize(); }981NodeId SyntaxTree::getRootId() const { return TreeImpl->getRootId(); }982SyntaxTree::PreorderIterator SyntaxTree::begin() const {983return TreeImpl->begin();984}985SyntaxTree::PreorderIterator SyntaxTree::end() const { return TreeImpl->end(); }986987int SyntaxTree::findPositionInParent(NodeId Id) const {988return TreeImpl->findPositionInParent(Id);989}990991std::pair<unsigned, unsigned>992SyntaxTree::getSourceRangeOffsets(const Node &N) const {993const SourceManager &SrcMgr = TreeImpl->AST.getSourceManager();994SourceRange Range = N.ASTNode.getSourceRange();995SourceLocation BeginLoc = Range.getBegin();996SourceLocation EndLoc = Lexer::getLocForEndOfToken(997Range.getEnd(), /*Offset=*/0, SrcMgr, TreeImpl->AST.getLangOpts());998if (auto *ThisExpr = N.ASTNode.get<CXXThisExpr>()) {999if (ThisExpr->isImplicit())1000EndLoc = BeginLoc;1001}1002unsigned Begin = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(BeginLoc));1003unsigned End = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(EndLoc));1004return {Begin, End};1005}10061007std::string SyntaxTree::getNodeValue(NodeId Id) const {1008return TreeImpl->getNodeValue(Id);1009}10101011std::string SyntaxTree::getNodeValue(const Node &N) const {1012return TreeImpl->getNodeValue(N);1013}10141015} // end namespace diff1016} // end namespace clang101710181019