Path: blob/main/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
35271 views
//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains support for DWARF4 hashing of DIEs.9//10//===----------------------------------------------------------------------===//1112#include "DIEHash.h"13#include "ByteStreamer.h"14#include "DwarfCompileUnit.h"15#include "DwarfDebug.h"16#include "llvm/ADT/ArrayRef.h"17#include "llvm/ADT/StringRef.h"18#include "llvm/BinaryFormat/Dwarf.h"19#include "llvm/CodeGen/AsmPrinter.h"20#include "llvm/Support/Debug.h"21#include "llvm/Support/raw_ostream.h"2223using namespace llvm;2425#define DEBUG_TYPE "dwarfdebug"2627/// Grabs the string in whichever attribute is passed in and returns28/// a reference to it.29static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {30// Iterate through all the attributes until we find the one we're31// looking for, if we can't find it return an empty string.32for (const auto &V : Die.values())33if (V.getAttribute() == Attr)34return V.getDIEString().getString();3536return StringRef("");37}3839/// Adds the string in \p Str to the hash. This also hashes40/// a trailing NULL with the string.41void DIEHash::addString(StringRef Str) {42LLVM_DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");43Hash.update(Str);44Hash.update(ArrayRef((uint8_t)'\0'));45}4647// FIXME: The LEB128 routines are copied and only slightly modified out of48// LEB128.h.4950/// Adds the unsigned in \p Value to the hash encoded as a ULEB128.51void DIEHash::addULEB128(uint64_t Value) {52LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");53do {54uint8_t Byte = Value & 0x7f;55Value >>= 7;56if (Value != 0)57Byte |= 0x80; // Mark this byte to show that more bytes will follow.58Hash.update(Byte);59} while (Value != 0);60}6162void DIEHash::addSLEB128(int64_t Value) {63LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");64bool More;65do {66uint8_t Byte = Value & 0x7f;67Value >>= 7;68More = !((((Value == 0) && ((Byte & 0x40) == 0)) ||69((Value == -1) && ((Byte & 0x40) != 0))));70if (More)71Byte |= 0x80; // Mark this byte to show that more bytes will follow.72Hash.update(Byte);73} while (More);74}7576/// Including \p Parent adds the context of Parent to the hash..77void DIEHash::addParentContext(const DIE &Parent) {7879LLVM_DEBUG(dbgs() << "Adding parent context to hash...\n");8081// [7.27.2] For each surrounding type or namespace beginning with the82// outermost such construct...83SmallVector<const DIE *, 1> Parents;84const DIE *Cur = &Parent;85while (Cur->getParent()) {86Parents.push_back(Cur);87Cur = Cur->getParent();88}89assert(Cur->getTag() == dwarf::DW_TAG_compile_unit ||90Cur->getTag() == dwarf::DW_TAG_type_unit);9192// Reverse iterate over our list to go from the outermost construct to the93// innermost.94for (const DIE *Die : llvm::reverse(Parents)) {95// ... Append the letter "C" to the sequence...96addULEB128('C');9798// ... Followed by the DWARF tag of the construct...99addULEB128(Die->getTag());100101// ... Then the name, taken from the DW_AT_name attribute.102StringRef Name = getDIEStringAttr(*Die, dwarf::DW_AT_name);103LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n");104if (!Name.empty())105addString(Name);106}107}108109// Collect all of the attributes for a particular DIE in single structure.110void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) {111112for (const auto &V : Die.values()) {113LLVM_DEBUG(dbgs() << "Attribute: "114<< dwarf::AttributeString(V.getAttribute())115<< " added.\n");116switch (V.getAttribute()) {117#define HANDLE_DIE_HASH_ATTR(NAME) \118case dwarf::NAME: \119Attrs.NAME = V; \120break;121#include "DIEHashAttributes.def"122default:123break;124}125}126}127128void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute,129const DIE &Entry, StringRef Name) {130// append the letter 'N'131addULEB128('N');132133// the DWARF attribute code (DW_AT_type or DW_AT_friend),134addULEB128(Attribute);135136// the context of the tag,137if (const DIE *Parent = Entry.getParent())138addParentContext(*Parent);139140// the letter 'E',141addULEB128('E');142143// and the name of the type.144addString(Name);145146// Currently DW_TAG_friends are not used by Clang, but if they do become so,147// here's the relevant spec text to implement:148//149// For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram,150// the context is omitted and the name to be used is the ABI-specific name151// of the subprogram (e.g., the mangled linker name).152}153154void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute,155unsigned DieNumber) {156// a) If T is in the list of [previously hashed types], use the letter157// 'R' as the marker158addULEB128('R');159160addULEB128(Attribute);161162// and use the unsigned LEB128 encoding of [the index of T in the163// list] as the attribute value;164addULEB128(DieNumber);165}166167void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,168const DIE &Entry) {169assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend "170"tags. Add support here when there's "171"a use case");172// Step 5173// If the tag in Step 3 is one of [the below tags]174if ((Tag == dwarf::DW_TAG_pointer_type ||175Tag == dwarf::DW_TAG_reference_type ||176Tag == dwarf::DW_TAG_rvalue_reference_type ||177Tag == dwarf::DW_TAG_ptr_to_member_type) &&178// and the referenced type (via the [below attributes])179// FIXME: This seems overly restrictive, and causes hash mismatches180// there's a decl/def difference in the containing type of a181// ptr_to_member_type, but it's what DWARF says, for some reason.182Attribute == dwarf::DW_AT_type) {183// ... has a DW_AT_name attribute,184StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name);185if (!Name.empty()) {186hashShallowTypeReference(Attribute, Entry, Name);187return;188}189}190191unsigned &DieNumber = Numbering[&Entry];192if (DieNumber) {193hashRepeatedTypeReference(Attribute, DieNumber);194return;195}196197// otherwise, b) use the letter 'T' as the marker, ...198addULEB128('T');199200addULEB128(Attribute);201202// ... process the type T recursively by performing Steps 2 through 7, and203// use the result as the attribute value.204DieNumber = Numbering.size();205computeHash(Entry);206}207208void DIEHash::hashRawTypeReference(const DIE &Entry) {209unsigned &DieNumber = Numbering[&Entry];210if (DieNumber) {211addULEB128('R');212addULEB128(DieNumber);213return;214}215DieNumber = Numbering.size();216addULEB128('T');217computeHash(Entry);218}219220// Hash all of the values in a block like set of values. This assumes that221// all of the data is going to be added as integers.222void DIEHash::hashBlockData(const DIE::const_value_range &Values) {223for (const auto &V : Values)224if (V.getType() == DIEValue::isBaseTypeRef) {225const DIE &C =226*CU->ExprRefedBaseTypes[V.getDIEBaseTypeRef().getIndex()].Die;227StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name);228assert(!Name.empty() &&229"Base types referenced from DW_OP_convert should have a name");230hashNestedType(C, Name);231} else232Hash.update((uint64_t)V.getDIEInteger().getValue());233}234235// Hash the contents of a loclistptr class.236void DIEHash::hashLocList(const DIELocList &LocList) {237HashingByteStreamer Streamer(*this);238DwarfDebug &DD = *AP->getDwarfDebug();239const DebugLocStream &Locs = DD.getDebugLocs();240const DebugLocStream::List &List = Locs.getList(LocList.getValue());241for (const DebugLocStream::Entry &Entry : Locs.getEntries(List))242DD.emitDebugLocEntry(Streamer, Entry, List.CU);243}244245// Hash an individual attribute \param Attr based on the type of attribute and246// the form.247void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {248dwarf::Attribute Attribute = Value.getAttribute();249250// Other attribute values use the letter 'A' as the marker, and the value251// consists of the form code (encoded as an unsigned LEB128 value) followed by252// the encoding of the value according to the form code. To ensure253// reproducibility of the signature, the set of forms used in the signature254// computation is limited to the following: DW_FORM_sdata, DW_FORM_flag,255// DW_FORM_string, and DW_FORM_block.256257switch (Value.getType()) {258case DIEValue::isNone:259llvm_unreachable("Expected valid DIEValue");260261// 7.27 Step 3262// ... An attribute that refers to another type entry T is processed as263// follows:264case DIEValue::isEntry:265hashDIEEntry(Attribute, Tag, Value.getDIEEntry().getEntry());266break;267case DIEValue::isInteger: {268addULEB128('A');269addULEB128(Attribute);270switch (Value.getForm()) {271case dwarf::DW_FORM_data1:272case dwarf::DW_FORM_data2:273case dwarf::DW_FORM_data4:274case dwarf::DW_FORM_data8:275case dwarf::DW_FORM_udata:276case dwarf::DW_FORM_sdata:277addULEB128(dwarf::DW_FORM_sdata);278addSLEB128((int64_t)Value.getDIEInteger().getValue());279break;280// DW_FORM_flag_present is just flag with a value of one. We still give it a281// value so just use the value.282case dwarf::DW_FORM_flag_present:283case dwarf::DW_FORM_flag:284addULEB128(dwarf::DW_FORM_flag);285addULEB128((int64_t)Value.getDIEInteger().getValue());286break;287default:288llvm_unreachable("Unknown integer form!");289}290break;291}292case DIEValue::isString:293addULEB128('A');294addULEB128(Attribute);295addULEB128(dwarf::DW_FORM_string);296addString(Value.getDIEString().getString());297break;298case DIEValue::isInlineString:299addULEB128('A');300addULEB128(Attribute);301addULEB128(dwarf::DW_FORM_string);302addString(Value.getDIEInlineString().getString());303break;304case DIEValue::isBlock:305case DIEValue::isLoc:306case DIEValue::isLocList:307addULEB128('A');308addULEB128(Attribute);309addULEB128(dwarf::DW_FORM_block);310if (Value.getType() == DIEValue::isBlock) {311addULEB128(Value.getDIEBlock().computeSize(AP->getDwarfFormParams()));312hashBlockData(Value.getDIEBlock().values());313} else if (Value.getType() == DIEValue::isLoc) {314addULEB128(Value.getDIELoc().computeSize(AP->getDwarfFormParams()));315hashBlockData(Value.getDIELoc().values());316} else {317// We could add the block length, but that would take318// a bit of work and not add a lot of uniqueness319// to the hash in some way we could test.320hashLocList(Value.getDIELocList());321}322break;323// FIXME: It's uncertain whether or not we should handle this at the moment.324case DIEValue::isExpr:325case DIEValue::isLabel:326case DIEValue::isBaseTypeRef:327case DIEValue::isDelta:328case DIEValue::isAddrOffset:329llvm_unreachable("Add support for additional value types.");330}331}332333// Go through the attributes from \param Attrs in the order specified in 7.27.4334// and hash them.335void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) {336#define HANDLE_DIE_HASH_ATTR(NAME) \337{ \338if (Attrs.NAME) \339hashAttribute(Attrs.NAME, Tag); \340}341#include "DIEHashAttributes.def"342// FIXME: Add the extended attributes.343}344345// Add all of the attributes for \param Die to the hash.346void DIEHash::addAttributes(const DIE &Die) {347DIEAttrs Attrs = {};348collectAttributes(Die, Attrs);349hashAttributes(Attrs, Die.getTag());350}351352void DIEHash::hashNestedType(const DIE &Die, StringRef Name) {353// 7.27 Step 7354// ... append the letter 'S',355addULEB128('S');356357// the tag of C,358addULEB128(Die.getTag());359360// and the name.361addString(Name);362}363364// Compute the hash of a DIE. This is based on the type signature computation365// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a366// flattened description of the DIE.367void DIEHash::computeHash(const DIE &Die) {368// Append the letter 'D', followed by the DWARF tag of the DIE.369addULEB128('D');370addULEB128(Die.getTag());371372// Add each of the attributes of the DIE.373addAttributes(Die);374375// Then hash each of the children of the DIE.376for (const auto &C : Die.children()) {377// 7.27 Step 7378// If C is a nested type entry or a member function entry, ...379if (isType(C.getTag()) || (C.getTag() == dwarf::DW_TAG_subprogram && isType(C.getParent()->getTag()))) {380StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name);381// ... and has a DW_AT_name attribute382if (!Name.empty()) {383hashNestedType(C, Name);384continue;385}386}387computeHash(C);388}389390// Following the last (or if there are no children), append a zero byte.391Hash.update(ArrayRef((uint8_t)'\0'));392}393394/// This is based on the type signature computation given in section 7.27 of the395/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE396/// with the inclusion of the full CU and all top level CU entities.397// TODO: Initialize the type chain at 0 instead of 1 for CU signatures.398uint64_t DIEHash::computeCUSignature(StringRef DWOName, const DIE &Die) {399Numbering.clear();400Numbering[&Die] = 1;401402if (!DWOName.empty())403Hash.update(DWOName);404// Hash the DIE.405computeHash(Die);406407// Now return the result.408MD5::MD5Result Result;409Hash.final(Result);410411// ... take the least significant 8 bytes and return those. Our MD5412// implementation always returns its results in little endian, so we actually413// need the "high" word.414return Result.high();415}416417/// This is based on the type signature computation given in section 7.27 of the418/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE419/// with the inclusion of additional forms not specifically called out in the420/// standard.421uint64_t DIEHash::computeTypeSignature(const DIE &Die) {422Numbering.clear();423Numbering[&Die] = 1;424425if (const DIE *Parent = Die.getParent())426addParentContext(*Parent);427428// Hash the DIE.429computeHash(Die);430431// Now return the result.432MD5::MD5Result Result;433Hash.final(Result);434435// ... take the least significant 8 bytes and return those. Our MD5436// implementation always returns its results in little endian, so we actually437// need the "high" word.438return Result.high();439}440441442