Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Utils/IRNormalizer.cpp
213799 views
//===--------------- IRNormalizer.cpp - IR Normalizer ---------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7/// \file8/// This file implements the IRNormalizer class which aims to transform LLVM9/// Modules into a normal form by reordering and renaming instructions while10/// preserving the same semantics. The normalizer makes it easier to spot11/// semantic differences while diffing two modules which have undergone12/// different passes.13///14//===----------------------------------------------------------------------===//1516#include "llvm/Transforms/Utils/IRNormalizer.h"17#include "llvm/ADT/SetVector.h"18#include "llvm/ADT/SmallPtrSet.h"19#include "llvm/ADT/SmallString.h"20#include "llvm/ADT/SmallVector.h"21#include "llvm/IR/BasicBlock.h"22#include "llvm/IR/Function.h"23#include "llvm/IR/IRBuilder.h"24#include "llvm/IR/InstIterator.h"25#include "llvm/Pass.h"26#include <stack>2728#define DEBUG_TYPE "normalize"2930using namespace llvm;3132namespace {33/// IRNormalizer aims to transform LLVM IR into normal form.34class IRNormalizer {35public:36bool runOnFunction(Function &F);3738IRNormalizer(IRNormalizerOptions Options) : Options(Options) {}3940private:41const IRNormalizerOptions Options;4243// Random constant for hashing, so the state isn't zero.44const uint64_t MagicHashConstant = 0x6acaa36bef8325c5ULL;45DenseSet<const Instruction *> NamedInstructions;4647SmallVector<Instruction *, 16> Outputs;4849/// \name Naming.50/// @{51void nameFunctionArguments(Function &F) const;52void nameBasicBlocks(Function &F) const;53void nameInstruction(Instruction *I);54void nameAsInitialInstruction(Instruction *I) const;55void nameAsRegularInstruction(Instruction *I);56void foldInstructionName(Instruction *I) const;57/// @}5859/// \name Reordering.60/// @{61void reorderInstructions(Function &F) const;62void reorderDefinition(Instruction *Definition,63std::stack<Instruction *> &TopologicalSort,64SmallPtrSet<const Instruction *, 32> &Visited) const;65void reorderInstructionOperandsByNames(Instruction *I) const;66void reorderPHIIncomingValues(PHINode *Phi) const;67/// @}6869/// \name Utility methods.70/// @{71template <typename T>72void sortCommutativeOperands(Instruction *I, T &Operands) const;73SmallVector<Instruction *, 16> collectOutputInstructions(Function &F) const;74bool isOutput(const Instruction *I) const;75bool isInitialInstruction(const Instruction *I) const;76bool hasOnlyImmediateOperands(const Instruction *I) const;77SetVector<int>78getOutputFootprint(Instruction *I,79SmallPtrSet<const Instruction *, 32> &Visited) const;80/// @}81};82} // namespace8384/// Entry method to the IRNormalizer.85///86/// \param F Function to normalize.87bool IRNormalizer::runOnFunction(Function &F) {88nameFunctionArguments(F);89nameBasicBlocks(F);9091Outputs = collectOutputInstructions(F);9293if (!Options.PreserveOrder)94reorderInstructions(F);9596// TODO: Reorder basic blocks via a topological sort.9798for (auto &I : Outputs)99nameInstruction(I);100101for (auto &I : instructions(F)) {102if (!Options.PreserveOrder) {103if (Options.ReorderOperands)104reorderInstructionOperandsByNames(&I);105106if (auto *Phi = dyn_cast<PHINode>(&I))107reorderPHIIncomingValues(Phi);108}109foldInstructionName(&I);110}111112return true;113}114115/// Numbers arguments.116///117/// \param F Function whose arguments will be renamed.118void IRNormalizer::nameFunctionArguments(Function &F) const {119int ArgumentCounter = 0;120for (auto &A : F.args()) {121if (Options.RenameAll || A.getName().empty()) {122A.setName("a" + Twine(ArgumentCounter));123ArgumentCounter += 1;124}125}126}127128/// Names basic blocks using a generated hash for each basic block in129/// a function considering the opcode and the order of output instructions.130///131/// \param F Function containing basic blocks to rename.132void IRNormalizer::nameBasicBlocks(Function &F) const {133for (auto &B : F) {134// Initialize to a magic constant, so the state isn't zero.135uint64_t Hash = MagicHashConstant;136137// Hash considering output instruction opcodes.138for (auto &I : B)139if (isOutput(&I))140Hash = hashing::detail::hash_16_bytes(Hash, I.getOpcode());141142if (Options.RenameAll || B.getName().empty()) {143// Name basic block. Substring hash to make diffs more readable.144B.setName("bb" + std::to_string(Hash).substr(0, 5));145}146}147}148149/// Names instructions graphically (recursive) in accordance with the150/// def-use tree, starting from the initial instructions (defs), finishing at151/// the output (top-most user) instructions (depth-first).152///153/// \param I Instruction to be renamed.154void IRNormalizer::nameInstruction(Instruction *I) {155// Ensure instructions are not renamed. This is done156// to prevent situation where instructions are used157// before their definition (in phi nodes)158if (NamedInstructions.contains(I))159return;160NamedInstructions.insert(I);161if (isInitialInstruction(I)) {162nameAsInitialInstruction(I);163} else {164// This must be a regular instruction.165nameAsRegularInstruction(I);166}167}168169template <typename T>170void IRNormalizer::sortCommutativeOperands(Instruction *I, T &Operands) const {171if (!(I->isCommutative() && Operands.size() >= 2))172return;173auto CommutativeEnd = Operands.begin();174std::advance(CommutativeEnd, 2);175llvm::sort(Operands.begin(), CommutativeEnd);176}177178/// Names instruction following the scheme:179/// vl00000Callee(Operands)180///181/// Where 00000 is a hash calculated considering instruction's opcode and output182/// footprint. Callee's name is only included when instruction's type is183/// CallInst. In cases where instruction is commutative, operands list is also184/// sorted.185///186/// Renames instruction only when RenameAll flag is raised or instruction is187/// unnamed.188///189/// \see getOutputFootprint()190/// \param I Instruction to be renamed.191void IRNormalizer::nameAsInitialInstruction(Instruction *I) const {192if (I->getType()->isVoidTy())193return;194if (!(I->getName().empty() || Options.RenameAll))195return;196LLVM_DEBUG(dbgs() << "Naming initial instruction: " << *I << "\n");197198// Instruction operands for further sorting.199SmallVector<SmallString<64>, 4> Operands;200201// Collect operands.202for (auto &Op : I->operands()) {203if (!isa<Function>(Op)) {204std::string TextRepresentation;205raw_string_ostream Stream(TextRepresentation);206Op->printAsOperand(Stream, false);207Operands.push_back(StringRef(Stream.str()));208}209}210211sortCommutativeOperands(I, Operands);212213// Initialize to a magic constant, so the state isn't zero.214uint64_t Hash = MagicHashConstant;215216// Consider instruction's opcode in the hash.217Hash = hashing::detail::hash_16_bytes(Hash, I->getOpcode());218219SmallPtrSet<const Instruction *, 32> Visited;220// Get output footprint for I.221SetVector<int> OutputFootprint = getOutputFootprint(I, Visited);222223// Consider output footprint in the hash.224for (const int &Output : OutputFootprint)225Hash = hashing::detail::hash_16_bytes(Hash, Output);226227// Base instruction name.228SmallString<256> Name;229Name.append("vl" + std::to_string(Hash).substr(0, 5));230231// In case of CallInst, consider callee in the instruction name.232if (const auto *CI = dyn_cast<CallInst>(I)) {233Function *F = CI->getCalledFunction();234235if (F != nullptr)236Name.append(F->getName());237}238239Name.append("(");240for (size_t i = 0; i < Operands.size(); ++i) {241Name.append(Operands[i]);242243if (i < Operands.size() - 1)244Name.append(", ");245}246Name.append(")");247248I->setName(Name);249}250251/// Names instruction following the scheme:252/// op00000Callee(Operands)253///254/// Where 00000 is a hash calculated considering instruction's opcode, its255/// operands' opcodes and order. Callee's name is only included when256/// instruction's type is CallInst. In cases where instruction is commutative,257/// operand list is also sorted.258///259/// Names instructions recursively in accordance with the def-use tree,260/// starting from the initial instructions (defs), finishing at261/// the output (top-most user) instructions (depth-first).262///263/// Renames instruction only when RenameAll flag is raised or instruction is264/// unnamed.265///266/// \see getOutputFootprint()267/// \param I Instruction to be renamed.268void IRNormalizer::nameAsRegularInstruction(Instruction *I) {269LLVM_DEBUG(dbgs() << "Naming regular instruction: " << *I << "\n");270271// Instruction operands for further sorting.272SmallVector<SmallString<128>, 4> Operands;273274// The name of a regular instruction depends275// on the names of its operands. Hence, all276// operands must be named first in the use-def277// walk.278279// Collect operands.280for (auto &Op : I->operands()) {281if (auto *I = dyn_cast<Instruction>(Op)) {282// Walk down the use-def chain.283nameInstruction(I);284Operands.push_back(I->getName());285} else if (!isa<Function>(Op)) {286// This must be an immediate value.287std::string TextRepresentation;288raw_string_ostream Stream(TextRepresentation);289Op->printAsOperand(Stream, false);290Operands.push_back(StringRef(Stream.str()));291}292}293294sortCommutativeOperands(I, Operands);295296// Initialize to a magic constant, so the state isn't zero.297uint64_t Hash = MagicHashConstant;298299// Consider instruction opcode in the hash.300Hash = hashing::detail::hash_16_bytes(Hash, I->getOpcode());301302// Operand opcodes for further sorting (commutative).303SmallVector<int, 4> OperandsOpcodes;304305// Collect operand opcodes for hashing.306for (auto &Op : I->operands())307if (auto *I = dyn_cast<Instruction>(Op))308OperandsOpcodes.push_back(I->getOpcode());309310sortCommutativeOperands(I, OperandsOpcodes);311312// Consider operand opcodes in the hash.313for (const int Code : OperandsOpcodes)314Hash = hashing::detail::hash_16_bytes(Hash, Code);315316// Base instruction name.317SmallString<512> Name;318Name.append("op" + std::to_string(Hash).substr(0, 5));319320// In case of CallInst, consider callee in the instruction name.321if (const auto *CI = dyn_cast<CallInst>(I))322if (const Function *F = CI->getCalledFunction())323Name.append(F->getName());324325Name.append("(");326for (size_t i = 0; i < Operands.size(); ++i) {327Name.append(Operands[i]);328329if (i < Operands.size() - 1)330Name.append(", ");331}332Name.append(")");333334if ((I->getName().empty() || Options.RenameAll) && !I->getType()->isVoidTy())335I->setName(Name);336}337338/// Shortens instruction's name. This method removes called function name from339/// the instruction name and substitutes the call chain with a corresponding340/// list of operands.341///342/// Examples:343/// op00000Callee(op00001Callee(...), vl00000Callee(1, 2), ...) ->344/// op00000(op00001, vl00000, ...) vl00000Callee(1, 2) -> vl00000(1, 2)345///346/// This method omits output instructions and pre-output (instructions directly347/// used by an output instruction) instructions (by default). By default it also348/// does not affect user named instructions.349///350/// \param I Instruction whose name will be folded.351void IRNormalizer::foldInstructionName(Instruction *I) const {352// If this flag is raised, fold all regular353// instructions (including pre-outputs).354if (!Options.FoldPreOutputs) {355// Don't fold if one of the users is an output instruction.356for (auto *U : I->users())357if (auto *IU = dyn_cast<Instruction>(U))358if (isOutput(IU))359return;360}361362// Don't fold if it is an output instruction or has no op prefix.363if (isOutput(I) || !I->getName().starts_with("op"))364return;365366// Instruction operands.367SmallVector<SmallString<64>, 4> Operands;368369for (auto &Op : I->operands()) {370if (const auto *I = dyn_cast<Instruction>(Op)) {371bool HasNormalName =372I->getName().starts_with("op") || I->getName().starts_with("vl");373374Operands.push_back(HasNormalName ? I->getName().substr(0, 7)375: I->getName());376}377}378379sortCommutativeOperands(I, Operands);380381SmallString<256> Name;382Name.append(I->getName().substr(0, 7));383384Name.append("(");385for (size_t i = 0; i < Operands.size(); ++i) {386Name.append(Operands[i]);387388if (i < Operands.size() - 1)389Name.append(", ");390}391Name.append(")");392393I->setName(Name);394}395396/// Reorders instructions by walking up the tree from each operand of an output397/// instruction and reducing the def-use distance.398/// This method assumes that output instructions were collected top-down,399/// otherwise the def-use chain may be broken.400/// This method is a wrapper for recursive reorderInstruction().401///402/// \see reorderInstruction()403void IRNormalizer::reorderInstructions(Function &F) const {404for (auto &BB : F) {405LLVM_DEBUG(dbgs() << "Reordering instructions in basic block: "406<< BB.getName() << "\n");407// Find the source nodes of the DAG of instructions in this basic block.408// Source nodes are instructions that have side effects, are terminators, or409// don't have a parent in the DAG of instructions.410//411// We must iterate from the first to the last instruction otherwise side412// effecting instructions could be reordered.413414std::stack<Instruction *> TopologicalSort;415SmallPtrSet<const Instruction *, 32> Visited;416for (auto &I : BB) {417// First process side effecting and terminating instructions.418if (!(isOutput(&I) || I.isTerminator()))419continue;420LLVM_DEBUG(dbgs() << "\tReordering from source effecting instruction: ";421I.dump());422reorderDefinition(&I, TopologicalSort, Visited);423}424425for (auto &I : BB) {426// Process the remaining instructions.427//428// TODO: Do more a intelligent sorting of these instructions. For example,429// seperate between dead instructinos and instructions used in another430// block. Use properties of the CFG the order instructions that are used431// in another block.432if (Visited.contains(&I))433continue;434LLVM_DEBUG(dbgs() << "\tReordering from source instruction: "; I.dump());435reorderDefinition(&I, TopologicalSort, Visited);436}437438LLVM_DEBUG(dbgs() << "Inserting instructions into: " << BB.getName()439<< "\n");440// Reorder based on the topological sort.441while (!TopologicalSort.empty()) {442auto *Instruction = TopologicalSort.top();443auto FirstNonPHIOrDbgOrAlloca = BB.getFirstNonPHIOrDbgOrAlloca();444if (auto *Call = dyn_cast<CallInst>(&*FirstNonPHIOrDbgOrAlloca)) {445if (Call->getIntrinsicID() ==446Intrinsic::experimental_convergence_entry ||447Call->getIntrinsicID() == Intrinsic::experimental_convergence_loop)448FirstNonPHIOrDbgOrAlloca++;449}450Instruction->moveBefore(FirstNonPHIOrDbgOrAlloca);451TopologicalSort.pop();452}453}454}455456void IRNormalizer::reorderDefinition(457Instruction *Definition, std::stack<Instruction *> &TopologicalSort,458SmallPtrSet<const Instruction *, 32> &Visited) const {459if (Visited.contains(Definition))460return;461Visited.insert(Definition);462463{464const auto *BasicBlock = Definition->getParent();465const auto FirstNonPHIOrDbgOrAlloca =466BasicBlock->getFirstNonPHIOrDbgOrAlloca();467if (FirstNonPHIOrDbgOrAlloca == BasicBlock->end())468return; // TODO: Is this necessary?469if (Definition->comesBefore(&*FirstNonPHIOrDbgOrAlloca))470return; // TODO: Do some kind of ordering for these instructions.471}472473for (auto &Operand : Definition->operands()) {474if (auto *Op = dyn_cast<Instruction>(Operand)) {475if (Op->getParent() != Definition->getParent())476continue; // Only reorder instruction within the same basic block477reorderDefinition(Op, TopologicalSort, Visited);478}479}480481LLVM_DEBUG(dbgs() << "\t\tNext in topological sort: "; Definition->dump());482if (Definition->isTerminator())483return;484if (auto *Call = dyn_cast<CallInst>(Definition)) {485if (Call->isMustTailCall())486return;487if (Call->getIntrinsicID() == Intrinsic::experimental_deoptimize)488return;489if (Call->getIntrinsicID() == Intrinsic::experimental_convergence_entry)490return;491if (Call->getIntrinsicID() == Intrinsic::experimental_convergence_loop)492return;493}494if (auto *BitCast = dyn_cast<BitCastInst>(Definition)) {495if (auto *Call = dyn_cast<CallInst>(BitCast->getOperand(0))) {496if (Call->isMustTailCall())497return;498}499}500501TopologicalSort.emplace(Definition);502}503504/// Reorders instruction's operands alphabetically. This method assumes505/// that passed instruction is commutative. Changing the operand order506/// in other instructions may change the semantics.507///508/// \param I Instruction whose operands will be reordered.509void IRNormalizer::reorderInstructionOperandsByNames(Instruction *I) const {510// This method assumes that passed I is commutative,511// changing the order of operands in other instructions512// may change the semantics.513514// Instruction operands for further sorting.515SmallVector<std::pair<std::string, Value *>, 4> Operands;516517// Collect operands.518for (auto &Op : I->operands()) {519if (auto *V = dyn_cast<Value>(Op)) {520if (isa<Instruction>(V)) {521// This is an an instruction.522Operands.push_back(std::pair<std::string, Value *>(V->getName(), V));523} else {524std::string TextRepresentation;525raw_string_ostream Stream(TextRepresentation);526Op->printAsOperand(Stream, false);527Operands.push_back(std::pair<std::string, Value *>(Stream.str(), V));528}529}530}531532// Sort operands.533sortCommutativeOperands(I, Operands);534535// Reorder operands.536unsigned Position = 0;537for (auto &Op : I->operands()) {538Op.set(Operands[Position].second);539Position += 1;540}541}542543/// Reorders PHI node's values according to the names of corresponding basic544/// blocks.545///546/// \param Phi PHI node to normalize.547void IRNormalizer::reorderPHIIncomingValues(PHINode *Phi) const {548// Values for further sorting.549SmallVector<std::pair<Value *, BasicBlock *>, 2> Values;550551// Collect blocks and corresponding values.552for (auto &BB : Phi->blocks()) {553Value *V = Phi->getIncomingValueForBlock(BB);554Values.push_back(std::pair<Value *, BasicBlock *>(V, BB));555}556557// Sort values according to the name of a basic block.558llvm::sort(Values, [](const std::pair<Value *, BasicBlock *> &LHS,559const std::pair<Value *, BasicBlock *> &RHS) {560return LHS.second->getName() < RHS.second->getName();561});562563// Swap.564for (unsigned i = 0; i < Values.size(); ++i) {565Phi->setIncomingBlock(i, Values[i].second);566Phi->setIncomingValue(i, Values[i].first);567}568}569570/// Returns a vector of output instructions. An output is an instruction which571/// has side-effects or is ReturnInst. Uses isOutput().572///573/// \see isOutput()574/// \param F Function to collect outputs from.575SmallVector<Instruction *, 16>576IRNormalizer::collectOutputInstructions(Function &F) const {577// Output instructions are collected top-down in each function,578// any change may break the def-use chain in reordering methods.579SmallVector<Instruction *, 16> Outputs;580for (auto &I : instructions(F))581if (isOutput(&I))582Outputs.push_back(&I);583return Outputs;584}585586/// Helper method checking whether the instruction may have side effects or is587/// ReturnInst.588///589/// \param I Considered instruction.590bool IRNormalizer::isOutput(const Instruction *I) const {591// Outputs are such instructions which may have side effects or is ReturnInst.592return I->mayHaveSideEffects() || isa<ReturnInst>(I);593}594595/// Helper method checking whether the instruction has users and only596/// immediate operands.597///598/// \param I Considered instruction.599bool IRNormalizer::isInitialInstruction(const Instruction *I) const {600// Initial instructions are such instructions whose values are used by601// other instructions, yet they only depend on immediate values.602return !I->user_empty() && hasOnlyImmediateOperands(I);603}604605/// Helper method checking whether the instruction has only immediate operands.606///607/// \param I Considered instruction.608bool IRNormalizer::hasOnlyImmediateOperands(const Instruction *I) const {609for (const auto &Op : I->operands())610if (isa<Instruction>(Op))611return false; // Found non-immediate operand (instruction).612return true;613}614615/// Helper method returning indices (distance from the beginning of the basic616/// block) of outputs using the \p I (eliminates repetitions). Walks down the617/// def-use tree recursively.618///619/// \param I Considered instruction.620/// \param Visited Set of visited instructions.621SetVector<int> IRNormalizer::getOutputFootprint(622Instruction *I, SmallPtrSet<const Instruction *, 32> &Visited) const {623624// Vector containing indexes of outputs (no repetitions),625// which use I in the order of walking down the def-use tree.626SetVector<int> Outputs;627628if (!Visited.count(I)) {629Visited.insert(I);630631if (isOutput(I)) {632// Gets output instruction's parent function.633Function *Func = I->getParent()->getParent();634635// Finds and inserts the index of the output to the vector.636unsigned Count = 0;637for (const auto &B : *Func) {638for (const auto &E : B) {639if (&E == I)640Outputs.insert(Count);641Count += 1;642}643}644645// Returns to the used instruction.646return Outputs;647}648649for (auto *U : I->users()) {650if (auto *UI = dyn_cast<Instruction>(U)) {651// Vector for outputs which use UI.652SetVector<int> OutputsUsingUI = getOutputFootprint(UI, Visited);653// Insert the indexes of outputs using UI.654Outputs.insert_range(OutputsUsingUI);655}656}657}658659// Return to the used instruction.660return Outputs;661}662663PreservedAnalyses IRNormalizerPass::run(Function &F,664FunctionAnalysisManager &AM) const {665IRNormalizer(Options).runOnFunction(F);666PreservedAnalyses PA;667PA.preserveSet<CFGAnalyses>();668return PA;669}670671672