Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
35266 views
//===- InstCombinePHI.cpp -------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the visitPHINode function.9//10//===----------------------------------------------------------------------===//1112#include "InstCombineInternal.h"13#include "llvm/ADT/STLExtras.h"14#include "llvm/ADT/SmallPtrSet.h"15#include "llvm/ADT/Statistic.h"16#include "llvm/Analysis/InstructionSimplify.h"17#include "llvm/Analysis/ValueTracking.h"18#include "llvm/IR/PatternMatch.h"19#include "llvm/Support/CommandLine.h"20#include "llvm/Transforms/InstCombine/InstCombiner.h"21#include "llvm/Transforms/Utils/Local.h"22#include <optional>2324using namespace llvm;25using namespace llvm::PatternMatch;2627#define DEBUG_TYPE "instcombine"2829static cl::opt<unsigned>30MaxNumPhis("instcombine-max-num-phis", cl::init(512),31cl::desc("Maximum number phis to handle in intptr/ptrint folding"));3233STATISTIC(NumPHIsOfInsertValues,34"Number of phi-of-insertvalue turned into insertvalue-of-phis");35STATISTIC(NumPHIsOfExtractValues,36"Number of phi-of-extractvalue turned into extractvalue-of-phi");37STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd");3839/// The PHI arguments will be folded into a single operation with a PHI node40/// as input. The debug location of the single operation will be the merged41/// locations of the original PHI node arguments.42void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) {43auto *FirstInst = cast<Instruction>(PN.getIncomingValue(0));44Inst->setDebugLoc(FirstInst->getDebugLoc());45// We do not expect a CallInst here, otherwise, N-way merging of DebugLoc46// will be inefficient.47assert(!isa<CallInst>(Inst));4849for (Value *V : drop_begin(PN.incoming_values())) {50auto *I = cast<Instruction>(V);51Inst->applyMergedLocation(Inst->getDebugLoc(), I->getDebugLoc());52}53}5455// Replace Integer typed PHI PN if the PHI's value is used as a pointer value.56// If there is an existing pointer typed PHI that produces the same value as PN,57// replace PN and the IntToPtr operation with it. Otherwise, synthesize a new58// PHI node:59//60// Case-1:61// bb1:62// int_init = PtrToInt(ptr_init)63// br label %bb264// bb2:65// int_val = PHI([int_init, %bb1], [int_val_inc, %bb2]66// ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2]67// ptr_val2 = IntToPtr(int_val)68// ...69// use(ptr_val2)70// ptr_val_inc = ...71// inc_val_inc = PtrToInt(ptr_val_inc)72//73// ==>74// bb1:75// br label %bb276// bb2:77// ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2]78// ...79// use(ptr_val)80// ptr_val_inc = ...81//82// Case-2:83// bb1:84// int_ptr = BitCast(ptr_ptr)85// int_init = Load(int_ptr)86// br label %bb287// bb2:88// int_val = PHI([int_init, %bb1], [int_val_inc, %bb2]89// ptr_val2 = IntToPtr(int_val)90// ...91// use(ptr_val2)92// ptr_val_inc = ...93// inc_val_inc = PtrToInt(ptr_val_inc)94// ==>95// bb1:96// ptr_init = Load(ptr_ptr)97// br label %bb298// bb2:99// ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2]100// ...101// use(ptr_val)102// ptr_val_inc = ...103// ...104//105bool InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {106if (!PN.getType()->isIntegerTy())107return false;108if (!PN.hasOneUse())109return false;110111auto *IntToPtr = dyn_cast<IntToPtrInst>(PN.user_back());112if (!IntToPtr)113return false;114115// Check if the pointer is actually used as pointer:116auto HasPointerUse = [](Instruction *IIP) {117for (User *U : IIP->users()) {118Value *Ptr = nullptr;119if (LoadInst *LoadI = dyn_cast<LoadInst>(U)) {120Ptr = LoadI->getPointerOperand();121} else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {122Ptr = SI->getPointerOperand();123} else if (GetElementPtrInst *GI = dyn_cast<GetElementPtrInst>(U)) {124Ptr = GI->getPointerOperand();125}126127if (Ptr && Ptr == IIP)128return true;129}130return false;131};132133if (!HasPointerUse(IntToPtr))134return false;135136if (DL.getPointerSizeInBits(IntToPtr->getAddressSpace()) !=137DL.getTypeSizeInBits(IntToPtr->getOperand(0)->getType()))138return false;139140SmallVector<Value *, 4> AvailablePtrVals;141for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) {142BasicBlock *BB = std::get<0>(Incoming);143Value *Arg = std::get<1>(Incoming);144145// First look backward:146if (auto *PI = dyn_cast<PtrToIntInst>(Arg)) {147AvailablePtrVals.emplace_back(PI->getOperand(0));148continue;149}150151// Next look forward:152Value *ArgIntToPtr = nullptr;153for (User *U : Arg->users()) {154if (isa<IntToPtrInst>(U) && U->getType() == IntToPtr->getType() &&155(DT.dominates(cast<Instruction>(U), BB) ||156cast<Instruction>(U)->getParent() == BB)) {157ArgIntToPtr = U;158break;159}160}161162if (ArgIntToPtr) {163AvailablePtrVals.emplace_back(ArgIntToPtr);164continue;165}166167// If Arg is defined by a PHI, allow it. This will also create168// more opportunities iteratively.169if (isa<PHINode>(Arg)) {170AvailablePtrVals.emplace_back(Arg);171continue;172}173174// For a single use integer load:175auto *LoadI = dyn_cast<LoadInst>(Arg);176if (!LoadI)177return false;178179if (!LoadI->hasOneUse())180return false;181182// Push the integer typed Load instruction into the available183// value set, and fix it up later when the pointer typed PHI184// is synthesized.185AvailablePtrVals.emplace_back(LoadI);186}187188// Now search for a matching PHI189auto *BB = PN.getParent();190assert(AvailablePtrVals.size() == PN.getNumIncomingValues() &&191"Not enough available ptr typed incoming values");192PHINode *MatchingPtrPHI = nullptr;193unsigned NumPhis = 0;194for (PHINode &PtrPHI : BB->phis()) {195// FIXME: consider handling this in AggressiveInstCombine196if (NumPhis++ > MaxNumPhis)197return false;198if (&PtrPHI == &PN || PtrPHI.getType() != IntToPtr->getType())199continue;200if (any_of(zip(PN.blocks(), AvailablePtrVals),201[&](const auto &BlockAndValue) {202BasicBlock *BB = std::get<0>(BlockAndValue);203Value *V = std::get<1>(BlockAndValue);204return PtrPHI.getIncomingValueForBlock(BB) != V;205}))206continue;207MatchingPtrPHI = &PtrPHI;208break;209}210211if (MatchingPtrPHI) {212assert(MatchingPtrPHI->getType() == IntToPtr->getType() &&213"Phi's Type does not match with IntToPtr");214// Explicitly replace the inttoptr (rather than inserting a ptrtoint) here,215// to make sure another transform can't undo it in the meantime.216replaceInstUsesWith(*IntToPtr, MatchingPtrPHI);217eraseInstFromFunction(*IntToPtr);218eraseInstFromFunction(PN);219return true;220}221222// If it requires a conversion for every PHI operand, do not do it.223if (all_of(AvailablePtrVals, [&](Value *V) {224return (V->getType() != IntToPtr->getType()) || isa<IntToPtrInst>(V);225}))226return false;227228// If any of the operand that requires casting is a terminator229// instruction, do not do it. Similarly, do not do the transform if the value230// is PHI in a block with no insertion point, for example, a catchswitch231// block, since we will not be able to insert a cast after the PHI.232if (any_of(AvailablePtrVals, [&](Value *V) {233if (V->getType() == IntToPtr->getType())234return false;235auto *Inst = dyn_cast<Instruction>(V);236if (!Inst)237return false;238if (Inst->isTerminator())239return true;240auto *BB = Inst->getParent();241if (isa<PHINode>(Inst) && BB->getFirstInsertionPt() == BB->end())242return true;243return false;244}))245return false;246247PHINode *NewPtrPHI = PHINode::Create(248IntToPtr->getType(), PN.getNumIncomingValues(), PN.getName() + ".ptr");249250InsertNewInstBefore(NewPtrPHI, PN.getIterator());251SmallDenseMap<Value *, Instruction *> Casts;252for (auto Incoming : zip(PN.blocks(), AvailablePtrVals)) {253auto *IncomingBB = std::get<0>(Incoming);254auto *IncomingVal = std::get<1>(Incoming);255256if (IncomingVal->getType() == IntToPtr->getType()) {257NewPtrPHI->addIncoming(IncomingVal, IncomingBB);258continue;259}260261#ifndef NDEBUG262LoadInst *LoadI = dyn_cast<LoadInst>(IncomingVal);263assert((isa<PHINode>(IncomingVal) ||264IncomingVal->getType()->isPointerTy() ||265(LoadI && LoadI->hasOneUse())) &&266"Can not replace LoadInst with multiple uses");267#endif268// Need to insert a BitCast.269// For an integer Load instruction with a single use, the load + IntToPtr270// cast will be simplified into a pointer load:271// %v = load i64, i64* %a.ip, align 8272// %v.cast = inttoptr i64 %v to float **273// ==>274// %v.ptrp = bitcast i64 * %a.ip to float **275// %v.cast = load float *, float ** %v.ptrp, align 8276Instruction *&CI = Casts[IncomingVal];277if (!CI) {278CI = CastInst::CreateBitOrPointerCast(IncomingVal, IntToPtr->getType(),279IncomingVal->getName() + ".ptr");280if (auto *IncomingI = dyn_cast<Instruction>(IncomingVal)) {281BasicBlock::iterator InsertPos(IncomingI);282InsertPos++;283BasicBlock *BB = IncomingI->getParent();284if (isa<PHINode>(IncomingI))285InsertPos = BB->getFirstInsertionPt();286assert(InsertPos != BB->end() && "should have checked above");287InsertNewInstBefore(CI, InsertPos);288} else {289auto *InsertBB = &IncomingBB->getParent()->getEntryBlock();290InsertNewInstBefore(CI, InsertBB->getFirstInsertionPt());291}292}293NewPtrPHI->addIncoming(CI, IncomingBB);294}295296// Explicitly replace the inttoptr (rather than inserting a ptrtoint) here,297// to make sure another transform can't undo it in the meantime.298replaceInstUsesWith(*IntToPtr, NewPtrPHI);299eraseInstFromFunction(*IntToPtr);300eraseInstFromFunction(PN);301return true;302}303304// Remove RoundTrip IntToPtr/PtrToInt Cast on PHI-Operand and305// fold Phi-operand to bitcast.306Instruction *InstCombinerImpl::foldPHIArgIntToPtrToPHI(PHINode &PN) {307// convert ptr2int ( phi[ int2ptr(ptr2int(x))] ) --> ptr2int ( phi [ x ] )308// Make sure all uses of phi are ptr2int.309if (!all_of(PN.users(), [](User *U) { return isa<PtrToIntInst>(U); }))310return nullptr;311312// Iterating over all operands to check presence of target pointers for313// optimization.314bool OperandWithRoundTripCast = false;315for (unsigned OpNum = 0; OpNum != PN.getNumIncomingValues(); ++OpNum) {316if (auto *NewOp =317simplifyIntToPtrRoundTripCast(PN.getIncomingValue(OpNum))) {318replaceOperand(PN, OpNum, NewOp);319OperandWithRoundTripCast = true;320}321}322if (!OperandWithRoundTripCast)323return nullptr;324return &PN;325}326327/// If we have something like phi [insertvalue(a,b,0), insertvalue(c,d,0)],328/// turn this into a phi[a,c] and phi[b,d] and a single insertvalue.329Instruction *330InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) {331auto *FirstIVI = cast<InsertValueInst>(PN.getIncomingValue(0));332333// Scan to see if all operands are `insertvalue`'s with the same indices,334// and all have a single use.335for (Value *V : drop_begin(PN.incoming_values())) {336auto *I = dyn_cast<InsertValueInst>(V);337if (!I || !I->hasOneUser() || I->getIndices() != FirstIVI->getIndices())338return nullptr;339}340341// For each operand of an `insertvalue`342std::array<PHINode *, 2> NewOperands;343for (int OpIdx : {0, 1}) {344auto *&NewOperand = NewOperands[OpIdx];345// Create a new PHI node to receive the values the operand has in each346// incoming basic block.347NewOperand = PHINode::Create(348FirstIVI->getOperand(OpIdx)->getType(), PN.getNumIncomingValues(),349FirstIVI->getOperand(OpIdx)->getName() + ".pn");350// And populate each operand's PHI with said values.351for (auto Incoming : zip(PN.blocks(), PN.incoming_values()))352NewOperand->addIncoming(353cast<InsertValueInst>(std::get<1>(Incoming))->getOperand(OpIdx),354std::get<0>(Incoming));355InsertNewInstBefore(NewOperand, PN.getIterator());356}357358// And finally, create `insertvalue` over the newly-formed PHI nodes.359auto *NewIVI = InsertValueInst::Create(NewOperands[0], NewOperands[1],360FirstIVI->getIndices(), PN.getName());361362PHIArgMergedDebugLoc(NewIVI, PN);363++NumPHIsOfInsertValues;364return NewIVI;365}366367/// If we have something like phi [extractvalue(a,0), extractvalue(b,0)],368/// turn this into a phi[a,b] and a single extractvalue.369Instruction *370InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) {371auto *FirstEVI = cast<ExtractValueInst>(PN.getIncomingValue(0));372373// Scan to see if all operands are `extractvalue`'s with the same indices,374// and all have a single use.375for (Value *V : drop_begin(PN.incoming_values())) {376auto *I = dyn_cast<ExtractValueInst>(V);377if (!I || !I->hasOneUser() || I->getIndices() != FirstEVI->getIndices() ||378I->getAggregateOperand()->getType() !=379FirstEVI->getAggregateOperand()->getType())380return nullptr;381}382383// Create a new PHI node to receive the values the aggregate operand has384// in each incoming basic block.385auto *NewAggregateOperand = PHINode::Create(386FirstEVI->getAggregateOperand()->getType(), PN.getNumIncomingValues(),387FirstEVI->getAggregateOperand()->getName() + ".pn");388// And populate the PHI with said values.389for (auto Incoming : zip(PN.blocks(), PN.incoming_values()))390NewAggregateOperand->addIncoming(391cast<ExtractValueInst>(std::get<1>(Incoming))->getAggregateOperand(),392std::get<0>(Incoming));393InsertNewInstBefore(NewAggregateOperand, PN.getIterator());394395// And finally, create `extractvalue` over the newly-formed PHI nodes.396auto *NewEVI = ExtractValueInst::Create(NewAggregateOperand,397FirstEVI->getIndices(), PN.getName());398399PHIArgMergedDebugLoc(NewEVI, PN);400++NumPHIsOfExtractValues;401return NewEVI;402}403404/// If we have something like phi [add (a,b), add(a,c)] and if a/b/c and the405/// adds all have a single user, turn this into a phi and a single binop.406Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {407Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));408assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));409unsigned Opc = FirstInst->getOpcode();410Value *LHSVal = FirstInst->getOperand(0);411Value *RHSVal = FirstInst->getOperand(1);412413Type *LHSType = LHSVal->getType();414Type *RHSType = RHSVal->getType();415416// Scan to see if all operands are the same opcode, and all have one user.417for (Value *V : drop_begin(PN.incoming_values())) {418Instruction *I = dyn_cast<Instruction>(V);419if (!I || I->getOpcode() != Opc || !I->hasOneUser() ||420// Verify type of the LHS matches so we don't fold cmp's of different421// types.422I->getOperand(0)->getType() != LHSType ||423I->getOperand(1)->getType() != RHSType)424return nullptr;425426// If they are CmpInst instructions, check their predicates427if (CmpInst *CI = dyn_cast<CmpInst>(I))428if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())429return nullptr;430431// Keep track of which operand needs a phi node.432if (I->getOperand(0) != LHSVal) LHSVal = nullptr;433if (I->getOperand(1) != RHSVal) RHSVal = nullptr;434}435436// If both LHS and RHS would need a PHI, don't do this transformation,437// because it would increase the number of PHIs entering the block,438// which leads to higher register pressure. This is especially439// bad when the PHIs are in the header of a loop.440if (!LHSVal && !RHSVal)441return nullptr;442443// Otherwise, this is safe to transform!444445Value *InLHS = FirstInst->getOperand(0);446Value *InRHS = FirstInst->getOperand(1);447PHINode *NewLHS = nullptr, *NewRHS = nullptr;448if (!LHSVal) {449NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(),450FirstInst->getOperand(0)->getName() + ".pn");451NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));452InsertNewInstBefore(NewLHS, PN.getIterator());453LHSVal = NewLHS;454}455456if (!RHSVal) {457NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),458FirstInst->getOperand(1)->getName() + ".pn");459NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));460InsertNewInstBefore(NewRHS, PN.getIterator());461RHSVal = NewRHS;462}463464// Add all operands to the new PHIs.465if (NewLHS || NewRHS) {466for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {467BasicBlock *InBB = std::get<0>(Incoming);468Value *InVal = std::get<1>(Incoming);469Instruction *InInst = cast<Instruction>(InVal);470if (NewLHS) {471Value *NewInLHS = InInst->getOperand(0);472NewLHS->addIncoming(NewInLHS, InBB);473}474if (NewRHS) {475Value *NewInRHS = InInst->getOperand(1);476NewRHS->addIncoming(NewInRHS, InBB);477}478}479}480481if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {482CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),483LHSVal, RHSVal);484PHIArgMergedDebugLoc(NewCI, PN);485return NewCI;486}487488BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst);489BinaryOperator *NewBinOp =490BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);491492NewBinOp->copyIRFlags(PN.getIncomingValue(0));493494for (Value *V : drop_begin(PN.incoming_values()))495NewBinOp->andIRFlags(V);496497PHIArgMergedDebugLoc(NewBinOp, PN);498return NewBinOp;499}500501Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {502GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));503504SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),505FirstInst->op_end());506// This is true if all GEP bases are allocas and if all indices into them are507// constants.508bool AllBasePointersAreAllocas = true;509510// We don't want to replace this phi if the replacement would require511// more than one phi, which leads to higher register pressure. This is512// especially bad when the PHIs are in the header of a loop.513bool NeededPhi = false;514515// Remember flags of the first phi-operand getelementptr.516GEPNoWrapFlags NW = FirstInst->getNoWrapFlags();517518// Scan to see if all operands are the same opcode, and all have one user.519for (Value *V : drop_begin(PN.incoming_values())) {520GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V);521if (!GEP || !GEP->hasOneUser() ||522GEP->getSourceElementType() != FirstInst->getSourceElementType() ||523GEP->getNumOperands() != FirstInst->getNumOperands())524return nullptr;525526NW &= GEP->getNoWrapFlags();527528// Keep track of whether or not all GEPs are of alloca pointers.529if (AllBasePointersAreAllocas &&530(!isa<AllocaInst>(GEP->getOperand(0)) ||531!GEP->hasAllConstantIndices()))532AllBasePointersAreAllocas = false;533534// Compare the operand lists.535for (unsigned Op = 0, E = FirstInst->getNumOperands(); Op != E; ++Op) {536if (FirstInst->getOperand(Op) == GEP->getOperand(Op))537continue;538539// Don't merge two GEPs when two operands differ (introducing phi nodes)540// if one of the PHIs has a constant for the index. The index may be541// substantially cheaper to compute for the constants, so making it a542// variable index could pessimize the path. This also handles the case543// for struct indices, which must always be constant.544if (isa<ConstantInt>(FirstInst->getOperand(Op)) ||545isa<ConstantInt>(GEP->getOperand(Op)))546return nullptr;547548if (FirstInst->getOperand(Op)->getType() !=549GEP->getOperand(Op)->getType())550return nullptr;551552// If we already needed a PHI for an earlier operand, and another operand553// also requires a PHI, we'd be introducing more PHIs than we're554// eliminating, which increases register pressure on entry to the PHI's555// block.556if (NeededPhi)557return nullptr;558559FixedOperands[Op] = nullptr; // Needs a PHI.560NeededPhi = true;561}562}563564// If all of the base pointers of the PHI'd GEPs are from allocas, don't565// bother doing this transformation. At best, this will just save a bit of566// offset calculation, but all the predecessors will have to materialize the567// stack address into a register anyway. We'd actually rather *clone* the568// load up into the predecessors so that we have a load of a gep of an alloca,569// which can usually all be folded into the load.570if (AllBasePointersAreAllocas)571return nullptr;572573// Otherwise, this is safe to transform. Insert PHI nodes for each operand574// that is variable.575SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());576577bool HasAnyPHIs = false;578for (unsigned I = 0, E = FixedOperands.size(); I != E; ++I) {579if (FixedOperands[I])580continue; // operand doesn't need a phi.581Value *FirstOp = FirstInst->getOperand(I);582PHINode *NewPN =583PHINode::Create(FirstOp->getType(), E, FirstOp->getName() + ".pn");584InsertNewInstBefore(NewPN, PN.getIterator());585586NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));587OperandPhis[I] = NewPN;588FixedOperands[I] = NewPN;589HasAnyPHIs = true;590}591592// Add all operands to the new PHIs.593if (HasAnyPHIs) {594for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {595BasicBlock *InBB = std::get<0>(Incoming);596Value *InVal = std::get<1>(Incoming);597GetElementPtrInst *InGEP = cast<GetElementPtrInst>(InVal);598599for (unsigned Op = 0, E = OperandPhis.size(); Op != E; ++Op)600if (PHINode *OpPhi = OperandPhis[Op])601OpPhi->addIncoming(InGEP->getOperand(Op), InBB);602}603}604605Value *Base = FixedOperands[0];606GetElementPtrInst *NewGEP =607GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base,608ArrayRef(FixedOperands).slice(1), NW);609PHIArgMergedDebugLoc(NewGEP, PN);610return NewGEP;611}612613/// Return true if we know that it is safe to sink the load out of the block614/// that defines it. This means that it must be obvious the value of the load is615/// not changed from the point of the load to the end of the block it is in.616///617/// Finally, it is safe, but not profitable, to sink a load targeting a618/// non-address-taken alloca. Doing so will cause us to not promote the alloca619/// to a register.620static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {621BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end();622623for (++BBI; BBI != E; ++BBI)624if (BBI->mayWriteToMemory()) {625// Calls that only access inaccessible memory do not block sinking the626// load.627if (auto *CB = dyn_cast<CallBase>(BBI))628if (CB->onlyAccessesInaccessibleMemory())629continue;630return false;631}632633// Check for non-address taken alloca. If not address-taken already, it isn't634// profitable to do this xform.635if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {636bool IsAddressTaken = false;637for (User *U : AI->users()) {638if (isa<LoadInst>(U)) continue;639if (StoreInst *SI = dyn_cast<StoreInst>(U)) {640// If storing TO the alloca, then the address isn't taken.641if (SI->getOperand(1) == AI) continue;642}643IsAddressTaken = true;644break;645}646647if (!IsAddressTaken && AI->isStaticAlloca())648return false;649}650651// If this load is a load from a GEP with a constant offset from an alloca,652// then we don't want to sink it. In its present form, it will be653// load [constant stack offset]. Sinking it will cause us to have to654// materialize the stack addresses in each predecessor in a register only to655// do a shared load from register in the successor.656if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))657if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))658if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())659return false;660661return true;662}663664Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {665LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));666667// Can't forward swifterror through a phi.668if (FirstLI->getOperand(0)->isSwiftError())669return nullptr;670671// FIXME: This is overconservative; this transform is allowed in some cases672// for atomic operations.673if (FirstLI->isAtomic())674return nullptr;675676// When processing loads, we need to propagate two bits of information to the677// sunk load: whether it is volatile, and what its alignment is.678bool IsVolatile = FirstLI->isVolatile();679Align LoadAlignment = FirstLI->getAlign();680const unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();681682// We can't sink the load if the loaded value could be modified between the683// load and the PHI.684if (FirstLI->getParent() != PN.getIncomingBlock(0) ||685!isSafeAndProfitableToSinkLoad(FirstLI))686return nullptr;687688// If the PHI is of volatile loads and the load block has multiple689// successors, sinking it would remove a load of the volatile value from690// the path through the other successor.691if (IsVolatile &&692FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)693return nullptr;694695for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {696BasicBlock *InBB = std::get<0>(Incoming);697Value *InVal = std::get<1>(Incoming);698LoadInst *LI = dyn_cast<LoadInst>(InVal);699if (!LI || !LI->hasOneUser() || LI->isAtomic())700return nullptr;701702// Make sure all arguments are the same type of operation.703if (LI->isVolatile() != IsVolatile ||704LI->getPointerAddressSpace() != LoadAddrSpace)705return nullptr;706707// Can't forward swifterror through a phi.708if (LI->getOperand(0)->isSwiftError())709return nullptr;710711// We can't sink the load if the loaded value could be modified between712// the load and the PHI.713if (LI->getParent() != InBB || !isSafeAndProfitableToSinkLoad(LI))714return nullptr;715716LoadAlignment = std::min(LoadAlignment, LI->getAlign());717718// If the PHI is of volatile loads and the load block has multiple719// successors, sinking it would remove a load of the volatile value from720// the path through the other successor.721if (IsVolatile && LI->getParent()->getTerminator()->getNumSuccessors() != 1)722return nullptr;723}724725// Okay, they are all the same operation. Create a new PHI node of the726// correct type, and PHI together all of the LHS's of the instructions.727PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),728PN.getNumIncomingValues(),729PN.getName()+".in");730731Value *InVal = FirstLI->getOperand(0);732NewPN->addIncoming(InVal, PN.getIncomingBlock(0));733LoadInst *NewLI =734new LoadInst(FirstLI->getType(), NewPN, "", IsVolatile, LoadAlignment);735736unsigned KnownIDs[] = {737LLVMContext::MD_tbaa,738LLVMContext::MD_range,739LLVMContext::MD_invariant_load,740LLVMContext::MD_alias_scope,741LLVMContext::MD_noalias,742LLVMContext::MD_nonnull,743LLVMContext::MD_align,744LLVMContext::MD_dereferenceable,745LLVMContext::MD_dereferenceable_or_null,746LLVMContext::MD_access_group,747LLVMContext::MD_noundef,748};749750for (unsigned ID : KnownIDs)751NewLI->setMetadata(ID, FirstLI->getMetadata(ID));752753// Add all operands to the new PHI and combine TBAA metadata.754for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {755BasicBlock *BB = std::get<0>(Incoming);756Value *V = std::get<1>(Incoming);757LoadInst *LI = cast<LoadInst>(V);758combineMetadata(NewLI, LI, KnownIDs, true);759Value *NewInVal = LI->getOperand(0);760if (NewInVal != InVal)761InVal = nullptr;762NewPN->addIncoming(NewInVal, BB);763}764765if (InVal) {766// The new PHI unions all of the same values together. This is really767// common, so we handle it intelligently here for compile-time speed.768NewLI->setOperand(0, InVal);769delete NewPN;770} else {771InsertNewInstBefore(NewPN, PN.getIterator());772}773774// If this was a volatile load that we are merging, make sure to loop through775// and mark all the input loads as non-volatile. If we don't do this, we will776// insert a new volatile load and the old ones will not be deletable.777if (IsVolatile)778for (Value *IncValue : PN.incoming_values())779cast<LoadInst>(IncValue)->setVolatile(false);780781PHIArgMergedDebugLoc(NewLI, PN);782return NewLI;783}784785/// TODO: This function could handle other cast types, but then it might786/// require special-casing a cast from the 'i1' type. See the comment in787/// FoldPHIArgOpIntoPHI() about pessimizing illegal integer types.788Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) {789// We cannot create a new instruction after the PHI if the terminator is an790// EHPad because there is no valid insertion point.791if (Instruction *TI = Phi.getParent()->getTerminator())792if (TI->isEHPad())793return nullptr;794795// Early exit for the common case of a phi with two operands. These are796// handled elsewhere. See the comment below where we check the count of zexts797// and constants for more details.798unsigned NumIncomingValues = Phi.getNumIncomingValues();799if (NumIncomingValues < 3)800return nullptr;801802// Find the narrower type specified by the first zext.803Type *NarrowType = nullptr;804for (Value *V : Phi.incoming_values()) {805if (auto *Zext = dyn_cast<ZExtInst>(V)) {806NarrowType = Zext->getSrcTy();807break;808}809}810if (!NarrowType)811return nullptr;812813// Walk the phi operands checking that we only have zexts or constants that814// we can shrink for free. Store the new operands for the new phi.815SmallVector<Value *, 4> NewIncoming;816unsigned NumZexts = 0;817unsigned NumConsts = 0;818for (Value *V : Phi.incoming_values()) {819if (auto *Zext = dyn_cast<ZExtInst>(V)) {820// All zexts must be identical and have one user.821if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUser())822return nullptr;823NewIncoming.push_back(Zext->getOperand(0));824NumZexts++;825} else if (auto *C = dyn_cast<Constant>(V)) {826// Make sure that constants can fit in the new type.827Constant *Trunc = getLosslessUnsignedTrunc(C, NarrowType);828if (!Trunc)829return nullptr;830NewIncoming.push_back(Trunc);831NumConsts++;832} else {833// If it's not a cast or a constant, bail out.834return nullptr;835}836}837838// The more common cases of a phi with no constant operands or just one839// variable operand are handled by FoldPHIArgOpIntoPHI() and foldOpIntoPhi()840// respectively. foldOpIntoPhi() wants to do the opposite transform that is841// performed here. It tries to replicate a cast in the phi operand's basic842// block to expose other folding opportunities. Thus, InstCombine will843// infinite loop without this check.844if (NumConsts == 0 || NumZexts < 2)845return nullptr;846847// All incoming values are zexts or constants that are safe to truncate.848// Create a new phi node of the narrow type, phi together all of the new849// operands, and zext the result back to the original type.850PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues,851Phi.getName() + ".shrunk");852for (unsigned I = 0; I != NumIncomingValues; ++I)853NewPhi->addIncoming(NewIncoming[I], Phi.getIncomingBlock(I));854855InsertNewInstBefore(NewPhi, Phi.getIterator());856return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType());857}858859/// If all operands to a PHI node are the same "unary" operator and they all are860/// only used by the PHI, PHI together their inputs, and do the operation once,861/// to the result of the PHI.862Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {863// We cannot create a new instruction after the PHI if the terminator is an864// EHPad because there is no valid insertion point.865if (Instruction *TI = PN.getParent()->getTerminator())866if (TI->isEHPad())867return nullptr;868869Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));870871if (isa<GetElementPtrInst>(FirstInst))872return foldPHIArgGEPIntoPHI(PN);873if (isa<LoadInst>(FirstInst))874return foldPHIArgLoadIntoPHI(PN);875if (isa<InsertValueInst>(FirstInst))876return foldPHIArgInsertValueInstructionIntoPHI(PN);877if (isa<ExtractValueInst>(FirstInst))878return foldPHIArgExtractValueInstructionIntoPHI(PN);879880// Scan the instruction, looking for input operations that can be folded away.881// If all input operands to the phi are the same instruction (e.g. a cast from882// the same type or "+42") we can pull the operation through the PHI, reducing883// code size and simplifying code.884Constant *ConstantOp = nullptr;885Type *CastSrcTy = nullptr;886887if (isa<CastInst>(FirstInst)) {888CastSrcTy = FirstInst->getOperand(0)->getType();889890// Be careful about transforming integer PHIs. We don't want to pessimize891// the code by turning an i32 into an i1293.892if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {893if (!shouldChangeType(PN.getType(), CastSrcTy))894return nullptr;895}896} else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {897// Can fold binop, compare or shift here if the RHS is a constant,898// otherwise call FoldPHIArgBinOpIntoPHI.899ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));900if (!ConstantOp)901return foldPHIArgBinOpIntoPHI(PN);902} else {903return nullptr; // Cannot fold this operation.904}905906// Check to see if all arguments are the same operation.907for (Value *V : drop_begin(PN.incoming_values())) {908Instruction *I = dyn_cast<Instruction>(V);909if (!I || !I->hasOneUser() || !I->isSameOperationAs(FirstInst))910return nullptr;911if (CastSrcTy) {912if (I->getOperand(0)->getType() != CastSrcTy)913return nullptr; // Cast operation must match.914} else if (I->getOperand(1) != ConstantOp) {915return nullptr;916}917}918919// Okay, they are all the same operation. Create a new PHI node of the920// correct type, and PHI together all of the LHS's of the instructions.921PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),922PN.getNumIncomingValues(),923PN.getName()+".in");924925Value *InVal = FirstInst->getOperand(0);926NewPN->addIncoming(InVal, PN.getIncomingBlock(0));927928// Add all operands to the new PHI.929for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {930BasicBlock *BB = std::get<0>(Incoming);931Value *V = std::get<1>(Incoming);932Value *NewInVal = cast<Instruction>(V)->getOperand(0);933if (NewInVal != InVal)934InVal = nullptr;935NewPN->addIncoming(NewInVal, BB);936}937938Value *PhiVal;939if (InVal) {940// The new PHI unions all of the same values together. This is really941// common, so we handle it intelligently here for compile-time speed.942PhiVal = InVal;943delete NewPN;944} else {945InsertNewInstBefore(NewPN, PN.getIterator());946PhiVal = NewPN;947}948949// Insert and return the new operation.950if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) {951CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal,952PN.getType());953PHIArgMergedDebugLoc(NewCI, PN);954return NewCI;955}956957if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {958BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);959BinOp->copyIRFlags(PN.getIncomingValue(0));960961for (Value *V : drop_begin(PN.incoming_values()))962BinOp->andIRFlags(V);963964PHIArgMergedDebugLoc(BinOp, PN);965return BinOp;966}967968CmpInst *CIOp = cast<CmpInst>(FirstInst);969CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),970PhiVal, ConstantOp);971PHIArgMergedDebugLoc(NewCI, PN);972return NewCI;973}974975/// Return true if this PHI node is only used by a PHI node cycle that is dead.976static bool isDeadPHICycle(PHINode *PN,977SmallPtrSetImpl<PHINode *> &PotentiallyDeadPHIs) {978if (PN->use_empty()) return true;979if (!PN->hasOneUse()) return false;980981// Remember this node, and if we find the cycle, return.982if (!PotentiallyDeadPHIs.insert(PN).second)983return true;984985// Don't scan crazily complex things.986if (PotentiallyDeadPHIs.size() == 16)987return false;988989if (PHINode *PU = dyn_cast<PHINode>(PN->user_back()))990return isDeadPHICycle(PU, PotentiallyDeadPHIs);991992return false;993}994995/// Return true if this phi node is always equal to NonPhiInVal.996/// This happens with mutually cyclic phi nodes like:997/// z = some value; x = phi (y, z); y = phi (x, z)998static bool PHIsEqualValue(PHINode *PN, Value *&NonPhiInVal,999SmallPtrSetImpl<PHINode *> &ValueEqualPHIs) {1000// See if we already saw this PHI node.1001if (!ValueEqualPHIs.insert(PN).second)1002return true;10031004// Don't scan crazily complex things.1005if (ValueEqualPHIs.size() == 16)1006return false;10071008// Scan the operands to see if they are either phi nodes or are equal to1009// the value.1010for (Value *Op : PN->incoming_values()) {1011if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {1012if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) {1013if (NonPhiInVal)1014return false;1015NonPhiInVal = OpPN;1016}1017} else if (Op != NonPhiInVal)1018return false;1019}10201021return true;1022}10231024/// Return an existing non-zero constant if this phi node has one, otherwise1025/// return constant 1.1026static ConstantInt *getAnyNonZeroConstInt(PHINode &PN) {1027assert(isa<IntegerType>(PN.getType()) && "Expect only integer type phi");1028for (Value *V : PN.operands())1029if (auto *ConstVA = dyn_cast<ConstantInt>(V))1030if (!ConstVA->isZero())1031return ConstVA;1032return ConstantInt::get(cast<IntegerType>(PN.getType()), 1);1033}10341035namespace {1036struct PHIUsageRecord {1037unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)1038unsigned Shift; // The amount shifted.1039Instruction *Inst; // The trunc instruction.10401041PHIUsageRecord(unsigned Pn, unsigned Sh, Instruction *User)1042: PHIId(Pn), Shift(Sh), Inst(User) {}10431044bool operator<(const PHIUsageRecord &RHS) const {1045if (PHIId < RHS.PHIId) return true;1046if (PHIId > RHS.PHIId) return false;1047if (Shift < RHS.Shift) return true;1048if (Shift > RHS.Shift) return false;1049return Inst->getType()->getPrimitiveSizeInBits() <1050RHS.Inst->getType()->getPrimitiveSizeInBits();1051}1052};10531054struct LoweredPHIRecord {1055PHINode *PN; // The PHI that was lowered.1056unsigned Shift; // The amount shifted.1057unsigned Width; // The width extracted.10581059LoweredPHIRecord(PHINode *Phi, unsigned Sh, Type *Ty)1060: PN(Phi), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}10611062// Ctor form used by DenseMap.1063LoweredPHIRecord(PHINode *Phi, unsigned Sh) : PN(Phi), Shift(Sh), Width(0) {}1064};1065} // namespace10661067namespace llvm {1068template<>1069struct DenseMapInfo<LoweredPHIRecord> {1070static inline LoweredPHIRecord getEmptyKey() {1071return LoweredPHIRecord(nullptr, 0);1072}1073static inline LoweredPHIRecord getTombstoneKey() {1074return LoweredPHIRecord(nullptr, 1);1075}1076static unsigned getHashValue(const LoweredPHIRecord &Val) {1077return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^1078(Val.Width>>3);1079}1080static bool isEqual(const LoweredPHIRecord &LHS,1081const LoweredPHIRecord &RHS) {1082return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&1083LHS.Width == RHS.Width;1084}1085};1086} // namespace llvm108710881089/// This is an integer PHI and we know that it has an illegal type: see if it is1090/// only used by trunc or trunc(lshr) operations. If so, we split the PHI into1091/// the various pieces being extracted. This sort of thing is introduced when1092/// SROA promotes an aggregate to large integer values.1093///1094/// TODO: The user of the trunc may be an bitcast to float/double/vector or an1095/// inttoptr. We should produce new PHIs in the right type.1096///1097Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {1098// PHIUsers - Keep track of all of the truncated values extracted from a set1099// of PHIs, along with their offset. These are the things we want to rewrite.1100SmallVector<PHIUsageRecord, 16> PHIUsers;11011102// PHIs are often mutually cyclic, so we keep track of a whole set of PHI1103// nodes which are extracted from. PHIsToSlice is a set we use to avoid1104// revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to1105// check the uses of (to ensure they are all extracts).1106SmallVector<PHINode*, 8> PHIsToSlice;1107SmallPtrSet<PHINode*, 8> PHIsInspected;11081109PHIsToSlice.push_back(&FirstPhi);1110PHIsInspected.insert(&FirstPhi);11111112for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {1113PHINode *PN = PHIsToSlice[PHIId];11141115// Scan the input list of the PHI. If any input is an invoke, and if the1116// input is defined in the predecessor, then we won't be split the critical1117// edge which is required to insert a truncate. Because of this, we have to1118// bail out.1119for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) {1120BasicBlock *BB = std::get<0>(Incoming);1121Value *V = std::get<1>(Incoming);1122InvokeInst *II = dyn_cast<InvokeInst>(V);1123if (!II)1124continue;1125if (II->getParent() != BB)1126continue;11271128// If we have a phi, and if it's directly in the predecessor, then we have1129// a critical edge where we need to put the truncate. Since we can't1130// split the edge in instcombine, we have to bail out.1131return nullptr;1132}11331134// If the incoming value is a PHI node before a catchswitch, we cannot1135// extract the value within that BB because we cannot insert any non-PHI1136// instructions in the BB.1137for (auto *Pred : PN->blocks())1138if (Pred->getFirstInsertionPt() == Pred->end())1139return nullptr;11401141for (User *U : PN->users()) {1142Instruction *UserI = cast<Instruction>(U);11431144// If the user is a PHI, inspect its uses recursively.1145if (PHINode *UserPN = dyn_cast<PHINode>(UserI)) {1146if (PHIsInspected.insert(UserPN).second)1147PHIsToSlice.push_back(UserPN);1148continue;1149}11501151// Truncates are always ok.1152if (isa<TruncInst>(UserI)) {1153PHIUsers.push_back(PHIUsageRecord(PHIId, 0, UserI));1154continue;1155}11561157// Otherwise it must be a lshr which can only be used by one trunc.1158if (UserI->getOpcode() != Instruction::LShr ||1159!UserI->hasOneUse() || !isa<TruncInst>(UserI->user_back()) ||1160!isa<ConstantInt>(UserI->getOperand(1)))1161return nullptr;11621163// Bail on out of range shifts.1164unsigned SizeInBits = UserI->getType()->getScalarSizeInBits();1165if (cast<ConstantInt>(UserI->getOperand(1))->getValue().uge(SizeInBits))1166return nullptr;11671168unsigned Shift = cast<ConstantInt>(UserI->getOperand(1))->getZExtValue();1169PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back()));1170}1171}11721173// If we have no users, they must be all self uses, just nuke the PHI.1174if (PHIUsers.empty())1175return replaceInstUsesWith(FirstPhi, PoisonValue::get(FirstPhi.getType()));11761177// If this phi node is transformable, create new PHIs for all the pieces1178// extracted out of it. First, sort the users by their offset and size.1179array_pod_sort(PHIUsers.begin(), PHIUsers.end());11801181LLVM_DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n';1182for (unsigned I = 1; I != PHIsToSlice.size(); ++I) dbgs()1183<< "AND USER PHI #" << I << ": " << *PHIsToSlice[I] << '\n');11841185// PredValues - This is a temporary used when rewriting PHI nodes. It is1186// hoisted out here to avoid construction/destruction thrashing.1187DenseMap<BasicBlock*, Value*> PredValues;11881189// ExtractedVals - Each new PHI we introduce is saved here so we don't1190// introduce redundant PHIs.1191DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;11921193for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {1194unsigned PHIId = PHIUsers[UserI].PHIId;1195PHINode *PN = PHIsToSlice[PHIId];1196unsigned Offset = PHIUsers[UserI].Shift;1197Type *Ty = PHIUsers[UserI].Inst->getType();11981199PHINode *EltPHI;12001201// If we've already lowered a user like this, reuse the previously lowered1202// value.1203if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == nullptr) {12041205// Otherwise, Create the new PHI node for this user.1206EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),1207PN->getName() + ".off" + Twine(Offset),1208PN->getIterator());1209assert(EltPHI->getType() != PN->getType() &&1210"Truncate didn't shrink phi?");12111212for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) {1213BasicBlock *Pred = std::get<0>(Incoming);1214Value *InVal = std::get<1>(Incoming);1215Value *&PredVal = PredValues[Pred];12161217// If we already have a value for this predecessor, reuse it.1218if (PredVal) {1219EltPHI->addIncoming(PredVal, Pred);1220continue;1221}12221223// Handle the PHI self-reuse case.1224if (InVal == PN) {1225PredVal = EltPHI;1226EltPHI->addIncoming(PredVal, Pred);1227continue;1228}12291230if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {1231// If the incoming value was a PHI, and if it was one of the PHIs we1232// already rewrote it, just use the lowered value.1233if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {1234PredVal = Res;1235EltPHI->addIncoming(PredVal, Pred);1236continue;1237}1238}12391240// Otherwise, do an extract in the predecessor.1241Builder.SetInsertPoint(Pred->getTerminator());1242Value *Res = InVal;1243if (Offset)1244Res = Builder.CreateLShr(1245Res, ConstantInt::get(InVal->getType(), Offset), "extract");1246Res = Builder.CreateTrunc(Res, Ty, "extract.t");1247PredVal = Res;1248EltPHI->addIncoming(Res, Pred);12491250// If the incoming value was a PHI, and if it was one of the PHIs we are1251// rewriting, we will ultimately delete the code we inserted. This1252// means we need to revisit that PHI to make sure we extract out the1253// needed piece.1254if (PHINode *OldInVal = dyn_cast<PHINode>(InVal))1255if (PHIsInspected.count(OldInVal)) {1256unsigned RefPHIId =1257find(PHIsToSlice, OldInVal) - PHIsToSlice.begin();1258PHIUsers.push_back(1259PHIUsageRecord(RefPHIId, Offset, cast<Instruction>(Res)));1260++UserE;1261}1262}1263PredValues.clear();12641265LLVM_DEBUG(dbgs() << " Made element PHI for offset " << Offset << ": "1266<< *EltPHI << '\n');1267ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;1268}12691270// Replace the use of this piece with the PHI node.1271replaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);1272}12731274// Replace all the remaining uses of the PHI nodes (self uses and the lshrs)1275// with poison.1276Value *Poison = PoisonValue::get(FirstPhi.getType());1277for (PHINode *PHI : drop_begin(PHIsToSlice))1278replaceInstUsesWith(*PHI, Poison);1279return replaceInstUsesWith(FirstPhi, Poison);1280}12811282static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,1283const DominatorTree &DT) {1284// Simplify the following patterns:1285// if (cond)1286// / \1287// ... ...1288// \ /1289// phi [true] [false]1290// and1291// switch (cond)1292// case v1: / \ case v2:1293// ... ...1294// \ /1295// phi [v1] [v2]1296// Make sure all inputs are constants.1297if (!all_of(PN.operands(), [](Value *V) { return isa<ConstantInt>(V); }))1298return nullptr;12991300BasicBlock *BB = PN.getParent();1301// Do not bother with unreachable instructions.1302if (!DT.isReachableFromEntry(BB))1303return nullptr;13041305// Determine which value the condition of the idom has for which successor.1306LLVMContext &Context = PN.getContext();1307auto *IDom = DT.getNode(BB)->getIDom()->getBlock();1308Value *Cond;1309SmallDenseMap<ConstantInt *, BasicBlock *, 8> SuccForValue;1310SmallDenseMap<BasicBlock *, unsigned, 8> SuccCount;1311auto AddSucc = [&](ConstantInt *C, BasicBlock *Succ) {1312SuccForValue[C] = Succ;1313++SuccCount[Succ];1314};1315if (auto *BI = dyn_cast<BranchInst>(IDom->getTerminator())) {1316if (BI->isUnconditional())1317return nullptr;13181319Cond = BI->getCondition();1320AddSucc(ConstantInt::getTrue(Context), BI->getSuccessor(0));1321AddSucc(ConstantInt::getFalse(Context), BI->getSuccessor(1));1322} else if (auto *SI = dyn_cast<SwitchInst>(IDom->getTerminator())) {1323Cond = SI->getCondition();1324++SuccCount[SI->getDefaultDest()];1325for (auto Case : SI->cases())1326AddSucc(Case.getCaseValue(), Case.getCaseSuccessor());1327} else {1328return nullptr;1329}13301331if (Cond->getType() != PN.getType())1332return nullptr;13331334// Check that edges outgoing from the idom's terminators dominate respective1335// inputs of the Phi.1336std::optional<bool> Invert;1337for (auto Pair : zip(PN.incoming_values(), PN.blocks())) {1338auto *Input = cast<ConstantInt>(std::get<0>(Pair));1339BasicBlock *Pred = std::get<1>(Pair);1340auto IsCorrectInput = [&](ConstantInt *Input) {1341// The input needs to be dominated by the corresponding edge of the idom.1342// This edge cannot be a multi-edge, as that would imply that multiple1343// different condition values follow the same edge.1344auto It = SuccForValue.find(Input);1345return It != SuccForValue.end() && SuccCount[It->second] == 1 &&1346DT.dominates(BasicBlockEdge(IDom, It->second),1347BasicBlockEdge(Pred, BB));1348};13491350// Depending on the constant, the condition may need to be inverted.1351bool NeedsInvert;1352if (IsCorrectInput(Input))1353NeedsInvert = false;1354else if (IsCorrectInput(cast<ConstantInt>(ConstantExpr::getNot(Input))))1355NeedsInvert = true;1356else1357return nullptr;13581359// Make sure the inversion requirement is always the same.1360if (Invert && *Invert != NeedsInvert)1361return nullptr;13621363Invert = NeedsInvert;1364}13651366if (!*Invert)1367return Cond;13681369// This Phi is actually opposite to branching condition of IDom. We invert1370// the condition that will potentially open up some opportunities for1371// sinking.1372auto InsertPt = BB->getFirstInsertionPt();1373if (InsertPt != BB->end()) {1374Self.Builder.SetInsertPoint(&*BB, InsertPt);1375return Self.Builder.CreateNot(Cond);1376}13771378return nullptr;1379}13801381// Fold iv = phi(start, iv.next = iv2.next op start)1382// where iv2 = phi(iv2.start, iv2.next = iv2 + iv2.step)1383// and iv2.start op start = start1384// to iv = iv2 op start1385static Value *foldDependentIVs(PHINode &PN, IRBuilderBase &Builder) {1386BasicBlock *BB = PN.getParent();1387if (PN.getNumIncomingValues() != 2)1388return nullptr;13891390Value *Start;1391Instruction *IvNext;1392BinaryOperator *Iv2Next;1393auto MatchOuterIV = [&](Value *V1, Value *V2) {1394if (match(V2, m_c_BinOp(m_Specific(V1), m_BinOp(Iv2Next))) ||1395match(V2, m_GEP(m_Specific(V1), m_BinOp(Iv2Next)))) {1396Start = V1;1397IvNext = cast<Instruction>(V2);1398return true;1399}1400return false;1401};14021403if (!MatchOuterIV(PN.getIncomingValue(0), PN.getIncomingValue(1)) &&1404!MatchOuterIV(PN.getIncomingValue(1), PN.getIncomingValue(0)))1405return nullptr;14061407PHINode *Iv2;1408Value *Iv2Start, *Iv2Step;1409if (!matchSimpleRecurrence(Iv2Next, Iv2, Iv2Start, Iv2Step) ||1410Iv2->getParent() != BB)1411return nullptr;14121413auto *BO = dyn_cast<BinaryOperator>(IvNext);1414Constant *Identity =1415BO ? ConstantExpr::getBinOpIdentity(BO->getOpcode(), Iv2Start->getType())1416: Constant::getNullValue(Iv2Start->getType());1417if (Iv2Start != Identity)1418return nullptr;14191420Builder.SetInsertPoint(&*BB, BB->getFirstInsertionPt());1421if (!BO) {1422auto *GEP = cast<GEPOperator>(IvNext);1423return Builder.CreateGEP(GEP->getSourceElementType(), Start, Iv2, "",1424cast<GEPOperator>(IvNext)->getNoWrapFlags());1425}14261427assert(BO->isCommutative() && "Must be commutative");1428Value *Res = Builder.CreateBinOp(BO->getOpcode(), Iv2, Start);1429cast<Instruction>(Res)->copyIRFlags(BO);1430return Res;1431}14321433// PHINode simplification1434//1435Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {1436if (Value *V = simplifyInstruction(&PN, SQ.getWithInstruction(&PN)))1437return replaceInstUsesWith(PN, V);14381439if (Instruction *Result = foldPHIArgZextsIntoPHI(PN))1440return Result;14411442if (Instruction *Result = foldPHIArgIntToPtrToPHI(PN))1443return Result;14441445// If all PHI operands are the same operation, pull them through the PHI,1446// reducing code size.1447auto *Inst0 = dyn_cast<Instruction>(PN.getIncomingValue(0));1448auto *Inst1 = dyn_cast<Instruction>(PN.getIncomingValue(1));1449if (Inst0 && Inst1 && Inst0->getOpcode() == Inst1->getOpcode() &&1450Inst0->hasOneUser())1451if (Instruction *Result = foldPHIArgOpIntoPHI(PN))1452return Result;14531454// If the incoming values are pointer casts of the same original value,1455// replace the phi with a single cast iff we can insert a non-PHI instruction.1456if (PN.getType()->isPointerTy() &&1457PN.getParent()->getFirstInsertionPt() != PN.getParent()->end()) {1458Value *IV0 = PN.getIncomingValue(0);1459Value *IV0Stripped = IV0->stripPointerCasts();1460// Set to keep track of values known to be equal to IV0Stripped after1461// stripping pointer casts.1462SmallPtrSet<Value *, 4> CheckedIVs;1463CheckedIVs.insert(IV0);1464if (IV0 != IV0Stripped &&1465all_of(PN.incoming_values(), [&CheckedIVs, IV0Stripped](Value *IV) {1466return !CheckedIVs.insert(IV).second ||1467IV0Stripped == IV->stripPointerCasts();1468})) {1469return CastInst::CreatePointerCast(IV0Stripped, PN.getType());1470}1471}14721473// If this is a trivial cycle in the PHI node graph, remove it. Basically, if1474// this PHI only has a single use (a PHI), and if that PHI only has one use (a1475// PHI)... break the cycle.1476if (PN.hasOneUse()) {1477if (foldIntegerTypedPHI(PN))1478return nullptr;14791480Instruction *PHIUser = cast<Instruction>(PN.user_back());1481if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {1482SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;1483PotentiallyDeadPHIs.insert(&PN);1484if (isDeadPHICycle(PU, PotentiallyDeadPHIs))1485return replaceInstUsesWith(PN, PoisonValue::get(PN.getType()));1486}14871488// If this phi has a single use, and if that use just computes a value for1489// the next iteration of a loop, delete the phi. This occurs with unused1490// induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this1491// common case here is good because the only other things that catch this1492// are induction variable analysis (sometimes) and ADCE, which is only run1493// late.1494if (PHIUser->hasOneUse() &&1495(isa<BinaryOperator>(PHIUser) || isa<UnaryOperator>(PHIUser) ||1496isa<GetElementPtrInst>(PHIUser)) &&1497PHIUser->user_back() == &PN) {1498return replaceInstUsesWith(PN, PoisonValue::get(PN.getType()));1499}1500}15011502// When a PHI is used only to be compared with zero, it is safe to replace1503// an incoming value proved as known nonzero with any non-zero constant.1504// For example, in the code below, the incoming value %v can be replaced1505// with any non-zero constant based on the fact that the PHI is only used to1506// be compared with zero and %v is a known non-zero value:1507// %v = select %cond, 1, 21508// %p = phi [%v, BB] ...1509// icmp eq, %p, 01510// FIXME: To be simple, handle only integer type for now.1511// This handles a small number of uses to keep the complexity down, and an1512// icmp(or(phi)) can equally be replaced with any non-zero constant as the1513// "or" will only add bits.1514if (!PN.hasNUsesOrMore(3)) {1515SmallVector<Instruction *> DropPoisonFlags;1516bool AllUsesOfPhiEndsInCmp = all_of(PN.users(), [&](User *U) {1517auto *CmpInst = dyn_cast<ICmpInst>(U);1518if (!CmpInst) {1519// This is always correct as OR only add bits and we are checking1520// against 0.1521if (U->hasOneUse() && match(U, m_c_Or(m_Specific(&PN), m_Value()))) {1522DropPoisonFlags.push_back(cast<Instruction>(U));1523CmpInst = dyn_cast<ICmpInst>(U->user_back());1524}1525}1526if (!CmpInst || !isa<IntegerType>(PN.getType()) ||1527!CmpInst->isEquality() || !match(CmpInst->getOperand(1), m_Zero())) {1528return false;1529}1530return true;1531});1532// All uses of PHI results in a compare with zero.1533if (AllUsesOfPhiEndsInCmp) {1534ConstantInt *NonZeroConst = nullptr;1535bool MadeChange = false;1536for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {1537Instruction *CtxI = PN.getIncomingBlock(I)->getTerminator();1538Value *VA = PN.getIncomingValue(I);1539if (isKnownNonZero(VA, getSimplifyQuery().getWithInstruction(CtxI))) {1540if (!NonZeroConst)1541NonZeroConst = getAnyNonZeroConstInt(PN);1542if (NonZeroConst != VA) {1543replaceOperand(PN, I, NonZeroConst);1544// The "disjoint" flag may no longer hold after the transform.1545for (Instruction *I : DropPoisonFlags)1546I->dropPoisonGeneratingFlags();1547MadeChange = true;1548}1549}1550}1551if (MadeChange)1552return &PN;1553}1554}15551556// We sometimes end up with phi cycles that non-obviously end up being the1557// same value, for example:1558// z = some value; x = phi (y, z); y = phi (x, z)1559// where the phi nodes don't necessarily need to be in the same block. Do a1560// quick check to see if the PHI node only contains a single non-phi value, if1561// so, scan to see if the phi cycle is actually equal to that value. If the1562// phi has no non-phi values then allow the "NonPhiInVal" to be set later if1563// one of the phis itself does not have a single input.1564{1565unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues();1566// Scan for the first non-phi operand.1567while (InValNo != NumIncomingVals &&1568isa<PHINode>(PN.getIncomingValue(InValNo)))1569++InValNo;15701571Value *NonPhiInVal =1572InValNo != NumIncomingVals ? PN.getIncomingValue(InValNo) : nullptr;15731574// Scan the rest of the operands to see if there are any conflicts, if so1575// there is no need to recursively scan other phis.1576if (NonPhiInVal)1577for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {1578Value *OpVal = PN.getIncomingValue(InValNo);1579if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))1580break;1581}15821583// If we scanned over all operands, then we have one unique value plus1584// phi values. Scan PHI nodes to see if they all merge in each other or1585// the value.1586if (InValNo == NumIncomingVals) {1587SmallPtrSet<PHINode *, 16> ValueEqualPHIs;1588if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))1589return replaceInstUsesWith(PN, NonPhiInVal);1590}1591}15921593// If there are multiple PHIs, sort their operands so that they all list1594// the blocks in the same order. This will help identical PHIs be eliminated1595// by other passes. Other passes shouldn't depend on this for correctness1596// however.1597auto Res = PredOrder.try_emplace(PN.getParent());1598if (!Res.second) {1599const auto &Preds = Res.first->second;1600for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {1601BasicBlock *BBA = PN.getIncomingBlock(I);1602BasicBlock *BBB = Preds[I];1603if (BBA != BBB) {1604Value *VA = PN.getIncomingValue(I);1605unsigned J = PN.getBasicBlockIndex(BBB);1606Value *VB = PN.getIncomingValue(J);1607PN.setIncomingBlock(I, BBB);1608PN.setIncomingValue(I, VB);1609PN.setIncomingBlock(J, BBA);1610PN.setIncomingValue(J, VA);1611// NOTE: Instcombine normally would want us to "return &PN" if we1612// modified any of the operands of an instruction. However, since we1613// aren't adding or removing uses (just rearranging them) we don't do1614// this in this case.1615}1616}1617} else {1618// Remember the block order of the first encountered phi node.1619append_range(Res.first->second, PN.blocks());1620}16211622// Is there an identical PHI node in this basic block?1623for (PHINode &IdenticalPN : PN.getParent()->phis()) {1624// Ignore the PHI node itself.1625if (&IdenticalPN == &PN)1626continue;1627// Note that even though we've just canonicalized this PHI, due to the1628// worklist visitation order, there are no guarantess that *every* PHI1629// has been canonicalized, so we can't just compare operands ranges.1630if (!PN.isIdenticalToWhenDefined(&IdenticalPN))1631continue;1632// Just use that PHI instead then.1633++NumPHICSEs;1634return replaceInstUsesWith(PN, &IdenticalPN);1635}16361637// If this is an integer PHI and we know that it has an illegal type, see if1638// it is only used by trunc or trunc(lshr) operations. If so, we split the1639// PHI into the various pieces being extracted. This sort of thing is1640// introduced when SROA promotes an aggregate to a single large integer type.1641if (PN.getType()->isIntegerTy() &&1642!DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))1643if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))1644return Res;16451646// Ultimately, try to replace this Phi with a dominating condition.1647if (auto *V = simplifyUsingControlFlow(*this, PN, DT))1648return replaceInstUsesWith(PN, V);16491650if (Value *Res = foldDependentIVs(PN, Builder))1651return replaceInstUsesWith(PN, Res);16521653return nullptr;1654}165516561657