CoCalc -- InstCombineLoadStoreAlloca.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
³⁵²⁶⁶ views
1
//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the visit functions for load, store and alloca.
10
//
11
//===----------------------------------------------------------------------===//
12

13
#include "InstCombineInternal.h"
14
#include "llvm/ADT/MapVector.h"
15
#include "llvm/ADT/SmallString.h"
16
#include "llvm/ADT/Statistic.h"
17
#include "llvm/Analysis/AliasAnalysis.h"
18
#include "llvm/Analysis/Loads.h"
19
#include "llvm/IR/DataLayout.h"
20
#include "llvm/IR/DebugInfoMetadata.h"
21
#include "llvm/IR/IntrinsicInst.h"
22
#include "llvm/IR/LLVMContext.h"
23
#include "llvm/IR/PatternMatch.h"
24
#include "llvm/Transforms/InstCombine/InstCombiner.h"
25
#include "llvm/Transforms/Utils/Local.h"
26
using namespace llvm;
27
using namespace PatternMatch;
28

29
#define DEBUG_TYPE "instcombine"
30

31
STATISTIC(NumDeadStore, "Number of dead stores eliminated");
32
STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
33

34
static cl::opt<unsigned> MaxCopiedFromConstantUsers(
35
    "instcombine-max-copied-from-constant-users", cl::init(300),
36
    cl::desc("Maximum users to visit in copy from constant transform"),
37
    cl::Hidden);
38

39
namespace llvm {
40
cl::opt<bool> EnableInferAlignmentPass(
41
    "enable-infer-alignment-pass", cl::init(true), cl::Hidden, cl::ZeroOrMore,
42
    cl::desc("Enable the InferAlignment pass, disabling alignment inference in "
43
             "InstCombine"));
44
}
45

46
/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
47
/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
48
/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
49
/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
50
/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
51
/// the alloca, and if the source pointer is a pointer to a constant memory
52
/// location, we can optimize this.
53
static bool
54
isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V,
55
                               MemTransferInst *&TheCopy,
56
                               SmallVectorImpl<Instruction *> &ToDelete) {
57
  // We track lifetime intrinsics as we encounter them.  If we decide to go
58
  // ahead and replace the value with the memory location, this lets the caller
59
  // quickly eliminate the markers.
60

61
  using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>;
62
  SmallVector<ValueAndIsOffset, 32> Worklist;
63
  SmallPtrSet<ValueAndIsOffset, 32> Visited;
64
  Worklist.emplace_back(V, false);
65
  while (!Worklist.empty()) {
66
    ValueAndIsOffset Elem = Worklist.pop_back_val();
67
    if (!Visited.insert(Elem).second)
68
      continue;
69
    if (Visited.size() > MaxCopiedFromConstantUsers)
70
      return false;
71

72
    const auto [Value, IsOffset] = Elem;
73
    for (auto &U : Value->uses()) {
74
      auto *I = cast<Instruction>(U.getUser());
75

76
      if (auto *LI = dyn_cast<LoadInst>(I)) {
77
        // Ignore non-volatile loads, they are always ok.
78
        if (!LI->isSimple()) return false;
79
        continue;
80
      }
81

82
      if (isa<PHINode, SelectInst>(I)) {
83
        // We set IsOffset=true, to forbid the memcpy from occurring after the
84
        // phi: If one of the phi operands is not based on the alloca, we
85
        // would incorrectly omit a write.
86
        Worklist.emplace_back(I, true);
87
        continue;
88
      }
89
      if (isa<BitCastInst, AddrSpaceCastInst>(I)) {
90
        // If uses of the bitcast are ok, we are ok.
91
        Worklist.emplace_back(I, IsOffset);
92
        continue;
93
      }
94
      if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
95
        // If the GEP has all zero indices, it doesn't offset the pointer. If it
96
        // doesn't, it does.
97
        Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
98
        continue;
99
      }
100

101
      if (auto *Call = dyn_cast<CallBase>(I)) {
102
        // If this is the function being called then we treat it like a load and
103
        // ignore it.
104
        if (Call->isCallee(&U))
105
          continue;
106

107
        unsigned DataOpNo = Call->getDataOperandNo(&U);
108
        bool IsArgOperand = Call->isArgOperand(&U);
109

110
        // Inalloca arguments are clobbered by the call.
111
        if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
112
          return false;
113

114
        // If this call site doesn't modify the memory, then we know it is just
115
        // a load (but one that potentially returns the value itself), so we can
116
        // ignore it if we know that the value isn't captured.
117
        bool NoCapture = Call->doesNotCapture(DataOpNo);
118
        if ((Call->onlyReadsMemory() && (Call->use_empty() || NoCapture)) ||
119
            (Call->onlyReadsMemory(DataOpNo) && NoCapture))
120
          continue;
121

122
        // If this is being passed as a byval argument, the caller is making a
123
        // copy, so it is only a read of the alloca.
124
        if (IsArgOperand && Call->isByValArgument(DataOpNo))
125
          continue;
126
      }
127

128
      // Lifetime intrinsics can be handled by the caller.
129
      if (I->isLifetimeStartOrEnd()) {
130
        assert(I->use_empty() && "Lifetime markers have no result to use!");
131
        ToDelete.push_back(I);
132
        continue;
133
      }
134

135
      // If this is isn't our memcpy/memmove, reject it as something we can't
136
      // handle.
137
      MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
138
      if (!MI)
139
        return false;
140

141
      // If the transfer is volatile, reject it.
142
      if (MI->isVolatile())
143
        return false;
144

145
      // If the transfer is using the alloca as a source of the transfer, then
146
      // ignore it since it is a load (unless the transfer is volatile).
147
      if (U.getOperandNo() == 1)
148
        continue;
149

150
      // If we already have seen a copy, reject the second one.
151
      if (TheCopy) return false;
152

153
      // If the pointer has been offset from the start of the alloca, we can't
154
      // safely handle this.
155
      if (IsOffset) return false;
156

157
      // If the memintrinsic isn't using the alloca as the dest, reject it.
158
      if (U.getOperandNo() != 0) return false;
159

160
      // If the source of the memcpy/move is not constant, reject it.
161
      if (isModSet(AA->getModRefInfoMask(MI->getSource())))
162
        return false;
163

164
      // Otherwise, the transform is safe.  Remember the copy instruction.
165
      TheCopy = MI;
166
    }
167
  }
168
  return true;
169
}
170

171
/// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only
172
/// modified by a copy from a constant memory location. If we can prove this, we
173
/// can replace any uses of the alloca with uses of the memory location
174
/// directly.
175
static MemTransferInst *
176
isOnlyCopiedFromConstantMemory(AAResults *AA,
177
                               AllocaInst *AI,
178
                               SmallVectorImpl<Instruction *> &ToDelete) {
179
  MemTransferInst *TheCopy = nullptr;
180
  if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
181
    return TheCopy;
182
  return nullptr;
183
}
184

185
/// Returns true if V is dereferenceable for size of alloca.
186
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
187
                                           const DataLayout &DL) {
188
  if (AI->isArrayAllocation())
189
    return false;
190
  uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
191
  if (!AllocaSize)
192
    return false;
193
  return isDereferenceableAndAlignedPointer(V, AI->getAlign(),
194
                                            APInt(64, AllocaSize), DL);
195
}
196

197
static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
198
                                            AllocaInst &AI, DominatorTree &DT) {
199
  // Check for array size of 1 (scalar allocation).
200
  if (!AI.isArrayAllocation()) {
201
    // i32 1 is the canonical array size for scalar allocations.
202
    if (AI.getArraySize()->getType()->isIntegerTy(32))
203
      return nullptr;
204

205
    // Canonicalize it.
206
    return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
207
  }
208

209
  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
210
  if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
211
    if (C->getValue().getActiveBits() <= 64) {
212
      Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
213
      AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
214
                                                nullptr, AI.getName());
215
      New->setAlignment(AI.getAlign());
216
      New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
217

218
      replaceAllDbgUsesWith(AI, *New, *New, DT);
219
      return IC.replaceInstUsesWith(AI, New);
220
    }
221
  }
222

223
  if (isa<UndefValue>(AI.getArraySize()))
224
    return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
225

226
  // Ensure that the alloca array size argument has type equal to the offset
227
  // size of the alloca() pointer, which, in the tyical case, is intptr_t,
228
  // so that any casting is exposed early.
229
  Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType());
230
  if (AI.getArraySize()->getType() != PtrIdxTy) {
231
    Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false);
232
    return IC.replaceOperand(AI, 0, V);
233
  }
234

235
  return nullptr;
236
}
237

238
namespace {
239
// If I and V are pointers in different address space, it is not allowed to
240
// use replaceAllUsesWith since I and V have different types. A
241
// non-target-specific transformation should not use addrspacecast on V since
242
// the two address space may be disjoint depending on target.
243
//
244
// This class chases down uses of the old pointer until reaching the load
245
// instructions, then replaces the old pointer in the load instructions with
246
// the new pointer. If during the chasing it sees bitcast or GEP, it will
247
// create new bitcast or GEP with the new pointer and use them in the load
248
// instruction.
249
class PointerReplacer {
250
public:
251
  PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS)
252
      : IC(IC), Root(Root), FromAS(SrcAS) {}
253

254
  bool collectUsers();
255
  void replacePointer(Value *V);
256

257
private:
258
  bool collectUsersRecursive(Instruction &I);
259
  void replace(Instruction *I);
260
  Value *getReplacement(Value *I);
261
  bool isAvailable(Instruction *I) const {
262
    return I == &Root || Worklist.contains(I);
263
  }
264

265
  bool isEqualOrValidAddrSpaceCast(const Instruction *I,
266
                                   unsigned FromAS) const {
267
    const auto *ASC = dyn_cast<AddrSpaceCastInst>(I);
268
    if (!ASC)
269
      return false;
270
    unsigned ToAS = ASC->getDestAddressSpace();
271
    return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
272
  }
273

274
  SmallPtrSet<Instruction *, 32> ValuesToRevisit;
275
  SmallSetVector<Instruction *, 4> Worklist;
276
  MapVector<Value *, Value *> WorkMap;
277
  InstCombinerImpl &IC;
278
  Instruction &Root;
279
  unsigned FromAS;
280
};
281
} // end anonymous namespace
282

283
bool PointerReplacer::collectUsers() {
284
  if (!collectUsersRecursive(Root))
285
    return false;
286

287
  // Ensure that all outstanding (indirect) users of I
288
  // are inserted into the Worklist. Return false
289
  // otherwise.
290
  for (auto *Inst : ValuesToRevisit)
291
    if (!Worklist.contains(Inst))
292
      return false;
293
  return true;
294
}
295

296
bool PointerReplacer::collectUsersRecursive(Instruction &I) {
297
  for (auto *U : I.users()) {
298
    auto *Inst = cast<Instruction>(&*U);
299
    if (auto *Load = dyn_cast<LoadInst>(Inst)) {
300
      if (Load->isVolatile())
301
        return false;
302
      Worklist.insert(Load);
303
    } else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
304
      // All incoming values must be instructions for replacability
305
      if (any_of(PHI->incoming_values(),
306
                 [](Value *V) { return !isa<Instruction>(V); }))
307
        return false;
308

309
      // If at least one incoming value of the PHI is not in Worklist,
310
      // store the PHI for revisiting and skip this iteration of the
311
      // loop.
312
      if (any_of(PHI->incoming_values(), [this](Value *V) {
313
            return !isAvailable(cast<Instruction>(V));
314
          })) {
315
        ValuesToRevisit.insert(Inst);
316
        continue;
317
      }
318

319
      Worklist.insert(PHI);
320
      if (!collectUsersRecursive(*PHI))
321
        return false;
322
    } else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
323
      if (!isa<Instruction>(SI->getTrueValue()) ||
324
          !isa<Instruction>(SI->getFalseValue()))
325
        return false;
326

327
      if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
328
          !isAvailable(cast<Instruction>(SI->getFalseValue()))) {
329
        ValuesToRevisit.insert(Inst);
330
        continue;
331
      }
332
      Worklist.insert(SI);
333
      if (!collectUsersRecursive(*SI))
334
        return false;
335
    } else if (isa<GetElementPtrInst>(Inst)) {
336
      Worklist.insert(Inst);
337
      if (!collectUsersRecursive(*Inst))
338
        return false;
339
    } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
340
      if (MI->isVolatile())
341
        return false;
342
      Worklist.insert(Inst);
343
    } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
344
      Worklist.insert(Inst);
345
      if (!collectUsersRecursive(*Inst))
346
        return false;
347
    } else if (Inst->isLifetimeStartOrEnd()) {
348
      continue;
349
    } else {
350
      // TODO: For arbitrary uses with address space mismatches, should we check
351
      // if we can introduce a valid addrspacecast?
352
      LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
353
      return false;
354
    }
355
  }
356

357
  return true;
358
}
359

360
Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
361

362
void PointerReplacer::replace(Instruction *I) {
363
  if (getReplacement(I))
364
    return;
365

366
  if (auto *LT = dyn_cast<LoadInst>(I)) {
367
    auto *V = getReplacement(LT->getPointerOperand());
368
    assert(V && "Operand not replaced");
369
    auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
370
                              LT->getAlign(), LT->getOrdering(),
371
                              LT->getSyncScopeID());
372
    NewI->takeName(LT);
373
    copyMetadataForLoad(*NewI, *LT);
374

375
    IC.InsertNewInstWith(NewI, LT->getIterator());
376
    IC.replaceInstUsesWith(*LT, NewI);
377
    WorkMap[LT] = NewI;
378
  } else if (auto *PHI = dyn_cast<PHINode>(I)) {
379
    Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
380
    auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
381
                                   PHI->getName(), PHI->getIterator());
382
    for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
383
      NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
384
                          PHI->getIncomingBlock(I));
385
    WorkMap[PHI] = NewPHI;
386
  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
387
    auto *V = getReplacement(GEP->getPointerOperand());
388
    assert(V && "Operand not replaced");
389
    SmallVector<Value *, 8> Indices(GEP->indices());
390
    auto *NewI =
391
        GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
392
    IC.InsertNewInstWith(NewI, GEP->getIterator());
393
    NewI->takeName(GEP);
394
    NewI->setNoWrapFlags(GEP->getNoWrapFlags());
395
    WorkMap[GEP] = NewI;
396
  } else if (auto *SI = dyn_cast<SelectInst>(I)) {
397
    Value *TrueValue = SI->getTrueValue();
398
    Value *FalseValue = SI->getFalseValue();
399
    if (Value *Replacement = getReplacement(TrueValue))
400
      TrueValue = Replacement;
401
    if (Value *Replacement = getReplacement(FalseValue))
402
      FalseValue = Replacement;
403
    auto *NewSI = SelectInst::Create(SI->getCondition(), TrueValue, FalseValue,
404
                                     SI->getName(), nullptr, SI);
405
    IC.InsertNewInstWith(NewSI, SI->getIterator());
406
    NewSI->takeName(SI);
407
    WorkMap[SI] = NewSI;
408
  } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
409
    auto *DestV = MemCpy->getRawDest();
410
    auto *SrcV = MemCpy->getRawSource();
411

412
    if (auto *DestReplace = getReplacement(DestV))
413
      DestV = DestReplace;
414
    if (auto *SrcReplace = getReplacement(SrcV))
415
      SrcV = SrcReplace;
416

417
    IC.Builder.SetInsertPoint(MemCpy);
418
    auto *NewI = IC.Builder.CreateMemTransferInst(
419
        MemCpy->getIntrinsicID(), DestV, MemCpy->getDestAlign(), SrcV,
420
        MemCpy->getSourceAlign(), MemCpy->getLength(), MemCpy->isVolatile());
421
    AAMDNodes AAMD = MemCpy->getAAMetadata();
422
    if (AAMD)
423
      NewI->setAAMetadata(AAMD);
424

425
    IC.eraseInstFromFunction(*MemCpy);
426
    WorkMap[MemCpy] = NewI;
427
  } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I)) {
428
    auto *V = getReplacement(ASC->getPointerOperand());
429
    assert(V && "Operand not replaced");
430
    assert(isEqualOrValidAddrSpaceCast(
431
               ASC, V->getType()->getPointerAddressSpace()) &&
432
           "Invalid address space cast!");
433

434
    if (V->getType()->getPointerAddressSpace() !=
435
        ASC->getType()->getPointerAddressSpace()) {
436
      auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
437
      NewI->takeName(ASC);
438
      IC.InsertNewInstWith(NewI, ASC->getIterator());
439
      WorkMap[ASC] = NewI;
440
    } else {
441
      WorkMap[ASC] = V;
442
    }
443

444
  } else {
445
    llvm_unreachable("should never reach here");
446
  }
447
}
448

449
void PointerReplacer::replacePointer(Value *V) {
450
#ifndef NDEBUG
451
  auto *PT = cast<PointerType>(Root.getType());
452
  auto *NT = cast<PointerType>(V->getType());
453
  assert(PT != NT && "Invalid usage");
454
#endif
455
  WorkMap[&Root] = V;
456

457
  for (Instruction *Workitem : Worklist)
458
    replace(Workitem);
459
}
460

461
Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
462
  if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
463
    return I;
464

465
  if (AI.getAllocatedType()->isSized()) {
466
    // Move all alloca's of zero byte objects to the entry block and merge them
467
    // together.  Note that we only do this for alloca's, because malloc should
468
    // allocate and return a unique pointer, even for a zero byte allocation.
469
    if (DL.getTypeAllocSize(AI.getAllocatedType()).getKnownMinValue() == 0) {
470
      // For a zero sized alloca there is no point in doing an array allocation.
471
      // This is helpful if the array size is a complicated expression not used
472
      // elsewhere.
473
      if (AI.isArrayAllocation())
474
        return replaceOperand(AI, 0,
475
            ConstantInt::get(AI.getArraySize()->getType(), 1));
476

477
      // Get the first instruction in the entry block.
478
      BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
479
      Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
480
      if (FirstInst != &AI) {
481
        // If the entry block doesn't start with a zero-size alloca then move
482
        // this one to the start of the entry block.  There is no problem with
483
        // dominance as the array size was forced to a constant earlier already.
484
        AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
485
        if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
486
            DL.getTypeAllocSize(EntryAI->getAllocatedType())
487
                    .getKnownMinValue() != 0) {
488
          AI.moveBefore(FirstInst);
489
          return &AI;
490
        }
491

492
        // Replace this zero-sized alloca with the one at the start of the entry
493
        // block after ensuring that the address will be aligned enough for both
494
        // types.
495
        const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
496
        EntryAI->setAlignment(MaxAlign);
497
        return replaceInstUsesWith(AI, EntryAI);
498
      }
499
    }
500
  }
501

502
  // Check to see if this allocation is only modified by a memcpy/memmove from
503
  // a memory location whose alignment is equal to or exceeds that of the
504
  // allocation. If this is the case, we can change all users to use the
505
  // constant memory location instead.  This is commonly produced by the CFE by
506
  // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
507
  // is only subsequently read.
508
  SmallVector<Instruction *, 4> ToDelete;
509
  if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
510
    Value *TheSrc = Copy->getSource();
511
    Align AllocaAlign = AI.getAlign();
512
    Align SourceAlign = getOrEnforceKnownAlignment(
513
      TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
514
    if (AllocaAlign <= SourceAlign &&
515
        isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
516
        !isa<Instruction>(TheSrc)) {
517
      // FIXME: Can we sink instructions without violating dominance when TheSrc
518
      // is an instruction instead of a constant or argument?
519
      LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
520
      LLVM_DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
521
      unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
522
      if (AI.getAddressSpace() == SrcAddrSpace) {
523
        for (Instruction *Delete : ToDelete)
524
          eraseInstFromFunction(*Delete);
525

526
        Instruction *NewI = replaceInstUsesWith(AI, TheSrc);
527
        eraseInstFromFunction(*Copy);
528
        ++NumGlobalCopies;
529
        return NewI;
530
      }
531

532
      PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace);
533
      if (PtrReplacer.collectUsers()) {
534
        for (Instruction *Delete : ToDelete)
535
          eraseInstFromFunction(*Delete);
536

537
        PtrReplacer.replacePointer(TheSrc);
538
        ++NumGlobalCopies;
539
      }
540
    }
541
  }
542

543
  // At last, use the generic allocation site handler to aggressively remove
544
  // unused allocas.
545
  return visitAllocSite(AI);
546
}
547

548
// Are we allowed to form a atomic load or store of this type?
549
static bool isSupportedAtomicType(Type *Ty) {
550
  return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
551
}
552

553
/// Helper to combine a load to a new type.
554
///
555
/// This just does the work of combining a load to a new type. It handles
556
/// metadata, etc., and returns the new instruction. The \c NewTy should be the
557
/// loaded *value* type. This will convert it to a pointer, cast the operand to
558
/// that pointer type, load it, etc.
559
///
560
/// Note that this will create all of the instructions with whatever insert
561
/// point the \c InstCombinerImpl currently is using.
562
LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy,
563
                                                 const Twine &Suffix) {
564
  assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
565
         "can't fold an atomic load to requested type");
566

567
  LoadInst *NewLoad =
568
      Builder.CreateAlignedLoad(NewTy, LI.getPointerOperand(), LI.getAlign(),
569
                                LI.isVolatile(), LI.getName() + Suffix);
570
  NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
571
  copyMetadataForLoad(*NewLoad, LI);
572
  return NewLoad;
573
}
574

575
/// Combine a store to a new type.
576
///
577
/// Returns the newly created store instruction.
578
static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI,
579
                                         Value *V) {
580
  assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
581
         "can't fold an atomic store of requested type");
582

583
  Value *Ptr = SI.getPointerOperand();
584
  SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
585
  SI.getAllMetadata(MD);
586

587
  StoreInst *NewStore =
588
      IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile());
589
  NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
590
  for (const auto &MDPair : MD) {
591
    unsigned ID = MDPair.first;
592
    MDNode *N = MDPair.second;
593
    // Note, essentially every kind of metadata should be preserved here! This
594
    // routine is supposed to clone a store instruction changing *only its
595
    // type*. The only metadata it makes sense to drop is metadata which is
596
    // invalidated when the pointer type changes. This should essentially
597
    // never be the case in LLVM, but we explicitly switch over only known
598
    // metadata to be conservatively correct. If you are adding metadata to
599
    // LLVM which pertains to stores, you almost certainly want to add it
600
    // here.
601
    switch (ID) {
602
    case LLVMContext::MD_dbg:
603
    case LLVMContext::MD_DIAssignID:
604
    case LLVMContext::MD_tbaa:
605
    case LLVMContext::MD_prof:
606
    case LLVMContext::MD_fpmath:
607
    case LLVMContext::MD_tbaa_struct:
608
    case LLVMContext::MD_alias_scope:
609
    case LLVMContext::MD_noalias:
610
    case LLVMContext::MD_nontemporal:
611
    case LLVMContext::MD_mem_parallel_loop_access:
612
    case LLVMContext::MD_access_group:
613
      // All of these directly apply.
614
      NewStore->setMetadata(ID, N);
615
      break;
616
    case LLVMContext::MD_invariant_load:
617
    case LLVMContext::MD_nonnull:
618
    case LLVMContext::MD_noundef:
619
    case LLVMContext::MD_range:
620
    case LLVMContext::MD_align:
621
    case LLVMContext::MD_dereferenceable:
622
    case LLVMContext::MD_dereferenceable_or_null:
623
      // These don't apply for stores.
624
      break;
625
    }
626
  }
627

628
  return NewStore;
629
}
630

631
/// Combine loads to match the type of their uses' value after looking
632
/// through intervening bitcasts.
633
///
634
/// The core idea here is that if the result of a load is used in an operation,
635
/// we should load the type most conducive to that operation. For example, when
636
/// loading an integer and converting that immediately to a pointer, we should
637
/// instead directly load a pointer.
638
///
639
/// However, this routine must never change the width of a load or the number of
640
/// loads as that would introduce a semantic change. This combine is expected to
641
/// be a semantic no-op which just allows loads to more closely model the types
642
/// of their consuming operations.
643
///
644
/// Currently, we also refuse to change the precise type used for an atomic load
645
/// or a volatile load. This is debatable, and might be reasonable to change
646
/// later. However, it is risky in case some backend or other part of LLVM is
647
/// relying on the exact type loaded to select appropriate atomic operations.
648
static Instruction *combineLoadToOperationType(InstCombinerImpl &IC,
649
                                               LoadInst &Load) {
650
  // FIXME: We could probably with some care handle both volatile and ordered
651
  // atomic loads here but it isn't clear that this is important.
652
  if (!Load.isUnordered())
653
    return nullptr;
654

655
  if (Load.use_empty())
656
    return nullptr;
657

658
  // swifterror values can't be bitcasted.
659
  if (Load.getPointerOperand()->isSwiftError())
660
    return nullptr;
661

662
  // Fold away bit casts of the loaded value by loading the desired type.
663
  // Note that we should not do this for pointer<->integer casts,
664
  // because that would result in type punning.
665
  if (Load.hasOneUse()) {
666
    // Don't transform when the type is x86_amx, it makes the pass that lower
667
    // x86_amx type happy.
668
    Type *LoadTy = Load.getType();
669
    if (auto *BC = dyn_cast<BitCastInst>(Load.user_back())) {
670
      assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!");
671
      if (BC->getType()->isX86_AMXTy())
672
        return nullptr;
673
    }
674

675
    if (auto *CastUser = dyn_cast<CastInst>(Load.user_back())) {
676
      Type *DestTy = CastUser->getDestTy();
677
      if (CastUser->isNoopCast(IC.getDataLayout()) &&
678
          LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() &&
679
          (!Load.isAtomic() || isSupportedAtomicType(DestTy))) {
680
        LoadInst *NewLoad = IC.combineLoadToNewType(Load, DestTy);
681
        CastUser->replaceAllUsesWith(NewLoad);
682
        IC.eraseInstFromFunction(*CastUser);
683
        return &Load;
684
      }
685
    }
686
  }
687

688
  // FIXME: We should also canonicalize loads of vectors when their elements are
689
  // cast to other types.
690
  return nullptr;
691
}
692

693
static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
694
  // FIXME: We could probably with some care handle both volatile and atomic
695
  // stores here but it isn't clear that this is important.
696
  if (!LI.isSimple())
697
    return nullptr;
698

699
  Type *T = LI.getType();
700
  if (!T->isAggregateType())
701
    return nullptr;
702

703
  StringRef Name = LI.getName();
704

705
  if (auto *ST = dyn_cast<StructType>(T)) {
706
    // If the struct only have one element, we unpack.
707
    auto NumElements = ST->getNumElements();
708
    if (NumElements == 1) {
709
      LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
710
                                                  ".unpack");
711
      NewLoad->setAAMetadata(LI.getAAMetadata());
712
      return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
713
        PoisonValue::get(T), NewLoad, 0, Name));
714
    }
715

716
    // We don't want to break loads with padding here as we'd loose
717
    // the knowledge that padding exists for the rest of the pipeline.
718
    const DataLayout &DL = IC.getDataLayout();
719
    auto *SL = DL.getStructLayout(ST);
720

721
    // Don't unpack for structure with scalable vector.
722
    if (SL->getSizeInBits().isScalable())
723
      return nullptr;
724

725
    if (SL->hasPadding())
726
      return nullptr;
727

728
    const auto Align = LI.getAlign();
729
    auto *Addr = LI.getPointerOperand();
730
    auto *IdxType = Type::getInt32Ty(T->getContext());
731
    auto *Zero = ConstantInt::get(IdxType, 0);
732

733
    Value *V = PoisonValue::get(T);
734
    for (unsigned i = 0; i < NumElements; i++) {
735
      Value *Indices[2] = {
736
        Zero,
737
        ConstantInt::get(IdxType, i),
738
      };
739
      auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices),
740
                                               Name + ".elt");
741
      auto *L = IC.Builder.CreateAlignedLoad(
742
          ST->getElementType(i), Ptr,
743
          commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
744
      // Propagate AA metadata. It'll still be valid on the narrowed load.
745
      L->setAAMetadata(LI.getAAMetadata());
746
      V = IC.Builder.CreateInsertValue(V, L, i);
747
    }
748

749
    V->setName(Name);
750
    return IC.replaceInstUsesWith(LI, V);
751
  }
752

753
  if (auto *AT = dyn_cast<ArrayType>(T)) {
754
    auto *ET = AT->getElementType();
755
    auto NumElements = AT->getNumElements();
756
    if (NumElements == 1) {
757
      LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
758
      NewLoad->setAAMetadata(LI.getAAMetadata());
759
      return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
760
        PoisonValue::get(T), NewLoad, 0, Name));
761
    }
762

763
    // Bail out if the array is too large. Ideally we would like to optimize
764
    // arrays of arbitrary size but this has a terrible impact on compile time.
765
    // The threshold here is chosen arbitrarily, maybe needs a little bit of
766
    // tuning.
767
    if (NumElements > IC.MaxArraySizeForCombine)
768
      return nullptr;
769

770
    const DataLayout &DL = IC.getDataLayout();
771
    TypeSize EltSize = DL.getTypeAllocSize(ET);
772
    const auto Align = LI.getAlign();
773

774
    auto *Addr = LI.getPointerOperand();
775
    auto *IdxType = Type::getInt64Ty(T->getContext());
776
    auto *Zero = ConstantInt::get(IdxType, 0);
777

778
    Value *V = PoisonValue::get(T);
779
    TypeSize Offset = TypeSize::getZero();
780
    for (uint64_t i = 0; i < NumElements; i++) {
781
      Value *Indices[2] = {
782
        Zero,
783
        ConstantInt::get(IdxType, i),
784
      };
785
      auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
786
                                               Name + ".elt");
787
      auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
788
      auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
789
                                             EltAlign, Name + ".unpack");
790
      L->setAAMetadata(LI.getAAMetadata());
791
      V = IC.Builder.CreateInsertValue(V, L, i);
792
      Offset += EltSize;
793
    }
794

795
    V->setName(Name);
796
    return IC.replaceInstUsesWith(LI, V);
797
  }
798

799
  return nullptr;
800
}
801

802
// If we can determine that all possible objects pointed to by the provided
803
// pointer value are, not only dereferenceable, but also definitively less than
804
// or equal to the provided maximum size, then return true. Otherwise, return
805
// false (constant global values and allocas fall into this category).
806
//
807
// FIXME: This should probably live in ValueTracking (or similar).
808
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
809
                                     const DataLayout &DL) {
810
  SmallPtrSet<Value *, 4> Visited;
811
  SmallVector<Value *, 4> Worklist(1, V);
812

813
  do {
814
    Value *P = Worklist.pop_back_val();
815
    P = P->stripPointerCasts();
816

817
    if (!Visited.insert(P).second)
818
      continue;
819

820
    if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
821
      Worklist.push_back(SI->getTrueValue());
822
      Worklist.push_back(SI->getFalseValue());
823
      continue;
824
    }
825

826
    if (PHINode *PN = dyn_cast<PHINode>(P)) {
827
      append_range(Worklist, PN->incoming_values());
828
      continue;
829
    }
830

831
    if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
832
      if (GA->isInterposable())
833
        return false;
834
      Worklist.push_back(GA->getAliasee());
835
      continue;
836
    }
837

838
    // If we know how big this object is, and it is less than MaxSize, continue
839
    // searching. Otherwise, return false.
840
    if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
841
      if (!AI->getAllocatedType()->isSized())
842
        return false;
843

844
      ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
845
      if (!CS)
846
        return false;
847

848
      TypeSize TS = DL.getTypeAllocSize(AI->getAllocatedType());
849
      if (TS.isScalable())
850
        return false;
851
      // Make sure that, even if the multiplication below would wrap as an
852
      // uint64_t, we still do the right thing.
853
      if ((CS->getValue().zext(128) * APInt(128, TS.getFixedValue()))
854
              .ugt(MaxSize))
855
        return false;
856
      continue;
857
    }
858

859
    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
860
      if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
861
        return false;
862

863
      uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
864
      if (InitSize > MaxSize)
865
        return false;
866
      continue;
867
    }
868

869
    return false;
870
  } while (!Worklist.empty());
871

872
  return true;
873
}
874

875
// If we're indexing into an object of a known size, and the outer index is
876
// not a constant, but having any value but zero would lead to undefined
877
// behavior, replace it with zero.
878
//
879
// For example, if we have:
880
// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
881
// ...
882
// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
883
// ... = load i32* %arrayidx, align 4
884
// Then we know that we can replace %x in the GEP with i64 0.
885
//
886
// FIXME: We could fold any GEP index to zero that would cause UB if it were
887
// not zero. Currently, we only handle the first such index. Also, we could
888
// also search through non-zero constant indices if we kept track of the
889
// offsets those indices implied.
890
static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
891
                                     GetElementPtrInst *GEPI, Instruction *MemI,
892
                                     unsigned &Idx) {
893
  if (GEPI->getNumOperands() < 2)
894
    return false;
895

896
  // Find the first non-zero index of a GEP. If all indices are zero, return
897
  // one past the last index.
898
  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
899
    unsigned I = 1;
900
    for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
901
      Value *V = GEPI->getOperand(I);
902
      if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
903
        if (CI->isZero())
904
          continue;
905

906
      break;
907
    }
908

909
    return I;
910
  };
911

912
  // Skip through initial 'zero' indices, and find the corresponding pointer
913
  // type. See if the next index is not a constant.
914
  Idx = FirstNZIdx(GEPI);
915
  if (Idx == GEPI->getNumOperands())
916
    return false;
917
  if (isa<Constant>(GEPI->getOperand(Idx)))
918
    return false;
919

920
  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
921
  Type *SourceElementType = GEPI->getSourceElementType();
922
  // Size information about scalable vectors is not available, so we cannot
923
  // deduce whether indexing at n is undefined behaviour or not. Bail out.
924
  if (SourceElementType->isScalableTy())
925
    return false;
926

927
  Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
928
  if (!AllocTy || !AllocTy->isSized())
929
    return false;
930
  const DataLayout &DL = IC.getDataLayout();
931
  uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedValue();
932

933
  // If there are more indices after the one we might replace with a zero, make
934
  // sure they're all non-negative. If any of them are negative, the overall
935
  // address being computed might be before the base address determined by the
936
  // first non-zero index.
937
  auto IsAllNonNegative = [&]() {
938
    for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
939
      KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
940
      if (Known.isNonNegative())
941
        continue;
942
      return false;
943
    }
944

945
    return true;
946
  };
947

948
  // FIXME: If the GEP is not inbounds, and there are extra indices after the
949
  // one we'll replace, those could cause the address computation to wrap
950
  // (rendering the IsAllNonNegative() check below insufficient). We can do
951
  // better, ignoring zero indices (and other indices we can prove small
952
  // enough not to wrap).
953
  if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
954
    return false;
955

956
  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
957
  // also known to be dereferenceable.
958
  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
959
         IsAllNonNegative();
960
}
961

962
// If we're indexing into an object with a variable index for the memory
963
// access, but the object has only one element, we can assume that the index
964
// will always be zero. If we replace the GEP, return it.
965
static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
966
                                          Instruction &MemI) {
967
  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
968
    unsigned Idx;
969
    if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
970
      Instruction *NewGEPI = GEPI->clone();
971
      NewGEPI->setOperand(Idx,
972
        ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
973
      IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
974
      return NewGEPI;
975
    }
976
  }
977

978
  return nullptr;
979
}
980

981
static bool canSimplifyNullStoreOrGEP(StoreInst &SI) {
982
  if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
983
    return false;
984

985
  auto *Ptr = SI.getPointerOperand();
986
  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
987
    Ptr = GEPI->getOperand(0);
988
  return (isa<ConstantPointerNull>(Ptr) &&
989
          !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
990
}
991

992
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
993
  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
994
    const Value *GEPI0 = GEPI->getOperand(0);
995
    if (isa<ConstantPointerNull>(GEPI0) &&
996
        !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
997
      return true;
998
  }
999
  if (isa<UndefValue>(Op) ||
1000
      (isa<ConstantPointerNull>(Op) &&
1001
       !NullPointerIsDefined(LI.getFunction(), LI.getPointerAddressSpace())))
1002
    return true;
1003
  return false;
1004
}
1005

1006
Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
1007
  Value *Op = LI.getOperand(0);
1008
  if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI)))
1009
    return replaceInstUsesWith(LI, Res);
1010

1011
  // Try to canonicalize the loaded type.
1012
  if (Instruction *Res = combineLoadToOperationType(*this, LI))
1013
    return Res;
1014

1015
  if (!EnableInferAlignmentPass) {
1016
    // Attempt to improve the alignment.
1017
    Align KnownAlign = getOrEnforceKnownAlignment(
1018
        Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT);
1019
    if (KnownAlign > LI.getAlign())
1020
      LI.setAlignment(KnownAlign);
1021
  }
1022

1023
  // Replace GEP indices if possible.
1024
  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))
1025
    return replaceOperand(LI, 0, NewGEPI);
1026

1027
  if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1028
    return Res;
1029

1030
  // Do really simple store-to-load forwarding and load CSE, to catch cases
1031
  // where there are several consecutive memory accesses to the same location,
1032
  // separated by a few arithmetic operations.
1033
  bool IsLoadCSE = false;
1034
  BatchAAResults BatchAA(*AA);
1035
  if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
1036
    if (IsLoadCSE)
1037
      combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1038

1039
    return replaceInstUsesWith(
1040
        LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1041
                                           LI.getName() + ".cast"));
1042
  }
1043

1044
  // None of the following transforms are legal for volatile/ordered atomic
1045
  // loads.  Most of them do apply for unordered atomics.
1046
  if (!LI.isUnordered()) return nullptr;
1047

1048
  // load(gep null, ...) -> unreachable
1049
  // load null/undef -> unreachable
1050
  // TODO: Consider a target hook for valid address spaces for this xforms.
1051
  if (canSimplifyNullLoadOrGEP(LI, Op)) {
1052
    CreateNonTerminatorUnreachable(&LI);
1053
    return replaceInstUsesWith(LI, PoisonValue::get(LI.getType()));
1054
  }
1055

1056
  if (Op->hasOneUse()) {
1057
    // Change select and PHI nodes to select values instead of addresses: this
1058
    // helps alias analysis out a lot, allows many others simplifications, and
1059
    // exposes redundancy in the code.
1060
    //
1061
    // Note that we cannot do the transformation unless we know that the
1062
    // introduced loads cannot trap!  Something like this is valid as long as
1063
    // the condition is always false: load (select bool %C, int* null, int* %G),
1064
    // but it would not be valid if we transformed it to load from null
1065
    // unconditionally.
1066
    //
1067
    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1068
      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
1069
      Align Alignment = LI.getAlign();
1070
      if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
1071
                                      Alignment, DL, SI) &&
1072
          isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
1073
                                      Alignment, DL, SI)) {
1074
        LoadInst *V1 =
1075
            Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1076
                               SI->getOperand(1)->getName() + ".val");
1077
        LoadInst *V2 =
1078
            Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1079
                               SI->getOperand(2)->getName() + ".val");
1080
        assert(LI.isUnordered() && "implied by above");
1081
        V1->setAlignment(Alignment);
1082
        V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1083
        V2->setAlignment(Alignment);
1084
        V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1085
        return SelectInst::Create(SI->getCondition(), V1, V2);
1086
      }
1087

1088
      // load (select (cond, null, P)) -> load P
1089
      if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1090
          !NullPointerIsDefined(SI->getFunction(),
1091
                                LI.getPointerAddressSpace()))
1092
        return replaceOperand(LI, 0, SI->getOperand(2));
1093

1094
      // load (select (cond, P, null)) -> load P
1095
      if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1096
          !NullPointerIsDefined(SI->getFunction(),
1097
                                LI.getPointerAddressSpace()))
1098
        return replaceOperand(LI, 0, SI->getOperand(1));
1099
    }
1100
  }
1101
  return nullptr;
1102
}
1103

1104
/// Look for extractelement/insertvalue sequence that acts like a bitcast.
1105
///
1106
/// \returns underlying value that was "cast", or nullptr otherwise.
1107
///
1108
/// For example, if we have:
1109
///
1110
///     %E0 = extractelement <2 x double> %U, i32 0
1111
///     %V0 = insertvalue [2 x double] undef, double %E0, 0
1112
///     %E1 = extractelement <2 x double> %U, i32 1
1113
///     %V1 = insertvalue [2 x double] %V0, double %E1, 1
1114
///
1115
/// and the layout of a <2 x double> is isomorphic to a [2 x double],
1116
/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1117
/// Note that %U may contain non-undef values where %V1 has undef.
1118
static Value *likeBitCastFromVector(InstCombinerImpl &IC, Value *V) {
1119
  Value *U = nullptr;
1120
  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1121
    auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1122
    if (!E)
1123
      return nullptr;
1124
    auto *W = E->getVectorOperand();
1125
    if (!U)
1126
      U = W;
1127
    else if (U != W)
1128
      return nullptr;
1129
    auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1130
    if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1131
      return nullptr;
1132
    V = IV->getAggregateOperand();
1133
  }
1134
  if (!match(V, m_Undef()) || !U)
1135
    return nullptr;
1136

1137
  auto *UT = cast<VectorType>(U->getType());
1138
  auto *VT = V->getType();
1139
  // Check that types UT and VT are bitwise isomorphic.
1140
  const auto &DL = IC.getDataLayout();
1141
  if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1142
    return nullptr;
1143
  }
1144
  if (auto *AT = dyn_cast<ArrayType>(VT)) {
1145
    if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1146
      return nullptr;
1147
  } else {
1148
    auto *ST = cast<StructType>(VT);
1149
    if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1150
      return nullptr;
1151
    for (const auto *EltT : ST->elements()) {
1152
      if (EltT != UT->getElementType())
1153
        return nullptr;
1154
    }
1155
  }
1156
  return U;
1157
}
1158

1159
/// Combine stores to match the type of value being stored.
1160
///
1161
/// The core idea here is that the memory does not have any intrinsic type and
1162
/// where we can we should match the type of a store to the type of value being
1163
/// stored.
1164
///
1165
/// However, this routine must never change the width of a store or the number of
1166
/// stores as that would introduce a semantic change. This combine is expected to
1167
/// be a semantic no-op which just allows stores to more closely model the types
1168
/// of their incoming values.
1169
///
1170
/// Currently, we also refuse to change the precise type used for an atomic or
1171
/// volatile store. This is debatable, and might be reasonable to change later.
1172
/// However, it is risky in case some backend or other part of LLVM is relying
1173
/// on the exact type stored to select appropriate atomic operations.
1174
///
1175
/// \returns true if the store was successfully combined away. This indicates
1176
/// the caller must erase the store instruction. We have to let the caller erase
1177
/// the store instruction as otherwise there is no way to signal whether it was
1178
/// combined or not: IC.EraseInstFromFunction returns a null pointer.
1179
static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) {
1180
  // FIXME: We could probably with some care handle both volatile and ordered
1181
  // atomic stores here but it isn't clear that this is important.
1182
  if (!SI.isUnordered())
1183
    return false;
1184

1185
  // swifterror values can't be bitcasted.
1186
  if (SI.getPointerOperand()->isSwiftError())
1187
    return false;
1188

1189
  Value *V = SI.getValueOperand();
1190

1191
  // Fold away bit casts of the stored value by storing the original type.
1192
  if (auto *BC = dyn_cast<BitCastInst>(V)) {
1193
    assert(!BC->getType()->isX86_AMXTy() &&
1194
           "store to x86_amx* should not happen!");
1195
    V = BC->getOperand(0);
1196
    // Don't transform when the type is x86_amx, it makes the pass that lower
1197
    // x86_amx type happy.
1198
    if (V->getType()->isX86_AMXTy())
1199
      return false;
1200
    if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1201
      combineStoreToNewValue(IC, SI, V);
1202
      return true;
1203
    }
1204
  }
1205

1206
  if (Value *U = likeBitCastFromVector(IC, V))
1207
    if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1208
      combineStoreToNewValue(IC, SI, U);
1209
      return true;
1210
    }
1211

1212
  // FIXME: We should also canonicalize stores of vectors when their elements
1213
  // are cast to other types.
1214
  return false;
1215
}
1216

1217
static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
1218
  // FIXME: We could probably with some care handle both volatile and atomic
1219
  // stores here but it isn't clear that this is important.
1220
  if (!SI.isSimple())
1221
    return false;
1222

1223
  Value *V = SI.getValueOperand();
1224
  Type *T = V->getType();
1225

1226
  if (!T->isAggregateType())
1227
    return false;
1228

1229
  if (auto *ST = dyn_cast<StructType>(T)) {
1230
    // If the struct only have one element, we unpack.
1231
    unsigned Count = ST->getNumElements();
1232
    if (Count == 1) {
1233
      V = IC.Builder.CreateExtractValue(V, 0);
1234
      combineStoreToNewValue(IC, SI, V);
1235
      return true;
1236
    }
1237

1238
    // We don't want to break loads with padding here as we'd loose
1239
    // the knowledge that padding exists for the rest of the pipeline.
1240
    const DataLayout &DL = IC.getDataLayout();
1241
    auto *SL = DL.getStructLayout(ST);
1242

1243
    // Don't unpack for structure with scalable vector.
1244
    if (SL->getSizeInBits().isScalable())
1245
      return false;
1246

1247
    if (SL->hasPadding())
1248
      return false;
1249

1250
    const auto Align = SI.getAlign();
1251

1252
    SmallString<16> EltName = V->getName();
1253
    EltName += ".elt";
1254
    auto *Addr = SI.getPointerOperand();
1255
    SmallString<16> AddrName = Addr->getName();
1256
    AddrName += ".repack";
1257

1258
    auto *IdxType = Type::getInt32Ty(ST->getContext());
1259
    auto *Zero = ConstantInt::get(IdxType, 0);
1260
    for (unsigned i = 0; i < Count; i++) {
1261
      Value *Indices[2] = {
1262
        Zero,
1263
        ConstantInt::get(IdxType, i),
1264
      };
1265
      auto *Ptr =
1266
          IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices), AddrName);
1267
      auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1268
      auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
1269
      llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1270
      NS->setAAMetadata(SI.getAAMetadata());
1271
    }
1272

1273
    return true;
1274
  }
1275

1276
  if (auto *AT = dyn_cast<ArrayType>(T)) {
1277
    // If the array only have one element, we unpack.
1278
    auto NumElements = AT->getNumElements();
1279
    if (NumElements == 1) {
1280
      V = IC.Builder.CreateExtractValue(V, 0);
1281
      combineStoreToNewValue(IC, SI, V);
1282
      return true;
1283
    }
1284

1285
    // Bail out if the array is too large. Ideally we would like to optimize
1286
    // arrays of arbitrary size but this has a terrible impact on compile time.
1287
    // The threshold here is chosen arbitrarily, maybe needs a little bit of
1288
    // tuning.
1289
    if (NumElements > IC.MaxArraySizeForCombine)
1290
      return false;
1291

1292
    const DataLayout &DL = IC.getDataLayout();
1293
    TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
1294
    const auto Align = SI.getAlign();
1295

1296
    SmallString<16> EltName = V->getName();
1297
    EltName += ".elt";
1298
    auto *Addr = SI.getPointerOperand();
1299
    SmallString<16> AddrName = Addr->getName();
1300
    AddrName += ".repack";
1301

1302
    auto *IdxType = Type::getInt64Ty(T->getContext());
1303
    auto *Zero = ConstantInt::get(IdxType, 0);
1304

1305
    TypeSize Offset = TypeSize::getZero();
1306
    for (uint64_t i = 0; i < NumElements; i++) {
1307
      Value *Indices[2] = {
1308
        Zero,
1309
        ConstantInt::get(IdxType, i),
1310
      };
1311
      auto *Ptr =
1312
          IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
1313
      auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1314
      auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
1315
      Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1316
      NS->setAAMetadata(SI.getAAMetadata());
1317
      Offset += EltSize;
1318
    }
1319

1320
    return true;
1321
  }
1322

1323
  return false;
1324
}
1325

1326
/// equivalentAddressValues - Test if A and B will obviously have the same
1327
/// value. This includes recognizing that %t0 and %t1 will have the same
1328
/// value in code like this:
1329
///   %t0 = getelementptr \@a, 0, 3
1330
///   store i32 0, i32* %t0
1331
///   %t1 = getelementptr \@a, 0, 3
1332
///   %t2 = load i32* %t1
1333
///
1334
static bool equivalentAddressValues(Value *A, Value *B) {
1335
  // Test if the values are trivially equivalent.
1336
  if (A == B) return true;
1337

1338
  // Test if the values come form identical arithmetic instructions.
1339
  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1340
  // its only used to compare two uses within the same basic block, which
1341
  // means that they'll always either have the same value or one of them
1342
  // will have an undefined value.
1343
  if (isa<BinaryOperator>(A) ||
1344
      isa<CastInst>(A) ||
1345
      isa<PHINode>(A) ||
1346
      isa<GetElementPtrInst>(A))
1347
    if (Instruction *BI = dyn_cast<Instruction>(B))
1348
      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1349
        return true;
1350

1351
  // Otherwise they may not be equivalent.
1352
  return false;
1353
}
1354

1355
Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
1356
  Value *Val = SI.getOperand(0);
1357
  Value *Ptr = SI.getOperand(1);
1358

1359
  // Try to canonicalize the stored type.
1360
  if (combineStoreToValueType(*this, SI))
1361
    return eraseInstFromFunction(SI);
1362

1363
  if (!EnableInferAlignmentPass) {
1364
    // Attempt to improve the alignment.
1365
    const Align KnownAlign = getOrEnforceKnownAlignment(
1366
        Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT);
1367
    if (KnownAlign > SI.getAlign())
1368
      SI.setAlignment(KnownAlign);
1369
  }
1370

1371
  // Try to canonicalize the stored type.
1372
  if (unpackStoreToAggregate(*this, SI))
1373
    return eraseInstFromFunction(SI);
1374

1375
  // Replace GEP indices if possible.
1376
  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
1377
    return replaceOperand(SI, 1, NewGEPI);
1378

1379
  // Don't hack volatile/ordered stores.
1380
  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1381
  if (!SI.isUnordered()) return nullptr;
1382

1383
  // If the RHS is an alloca with a single use, zapify the store, making the
1384
  // alloca dead.
1385
  if (Ptr->hasOneUse()) {
1386
    if (isa<AllocaInst>(Ptr))
1387
      return eraseInstFromFunction(SI);
1388
    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1389
      if (isa<AllocaInst>(GEP->getOperand(0))) {
1390
        if (GEP->getOperand(0)->hasOneUse())
1391
          return eraseInstFromFunction(SI);
1392
      }
1393
    }
1394
  }
1395

1396
  // If we have a store to a location which is known constant, we can conclude
1397
  // that the store must be storing the constant value (else the memory
1398
  // wouldn't be constant), and this must be a noop.
1399
  if (!isModSet(AA->getModRefInfoMask(Ptr)))
1400
    return eraseInstFromFunction(SI);
1401

1402
  // Do really simple DSE, to catch cases where there are several consecutive
1403
  // stores to the same location, separated by a few arithmetic operations. This
1404
  // situation often occurs with bitfield accesses.
1405
  BasicBlock::iterator BBI(SI);
1406
  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1407
       --ScanInsts) {
1408
    --BBI;
1409
    // Don't count debug info directives, lest they affect codegen,
1410
    // and we skip pointer-to-pointer bitcasts, which are NOPs.
1411
    if (BBI->isDebugOrPseudoInst()) {
1412
      ScanInsts++;
1413
      continue;
1414
    }
1415

1416
    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1417
      // Prev store isn't volatile, and stores to the same location?
1418
      if (PrevSI->isUnordered() &&
1419
          equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
1420
          PrevSI->getValueOperand()->getType() ==
1421
              SI.getValueOperand()->getType()) {
1422
        ++NumDeadStore;
1423
        // Manually add back the original store to the worklist now, so it will
1424
        // be processed after the operands of the removed store, as this may
1425
        // expose additional DSE opportunities.
1426
        Worklist.push(&SI);
1427
        eraseInstFromFunction(*PrevSI);
1428
        return nullptr;
1429
      }
1430
      break;
1431
    }
1432

1433
    // If this is a load, we have to stop.  However, if the loaded value is from
1434
    // the pointer we're loading and is producing the pointer we're storing,
1435
    // then *this* store is dead (X = load P; store X -> P).
1436
    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1437
      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1438
        assert(SI.isUnordered() && "can't eliminate ordering operation");
1439
        return eraseInstFromFunction(SI);
1440
      }
1441

1442
      // Otherwise, this is a load from some other location.  Stores before it
1443
      // may not be dead.
1444
      break;
1445
    }
1446

1447
    // Don't skip over loads, throws or things that can modify memory.
1448
    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1449
      break;
1450
  }
1451

1452
  // store X, null    -> turns into 'unreachable' in SimplifyCFG
1453
  // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1454
  if (canSimplifyNullStoreOrGEP(SI)) {
1455
    if (!isa<PoisonValue>(Val))
1456
      return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
1457
    return nullptr;  // Do not modify these!
1458
  }
1459

1460
  // This is a non-terminator unreachable marker. Don't remove it.
1461
  if (isa<UndefValue>(Ptr)) {
1462
    // Remove guaranteed-to-transfer instructions before the marker.
1463
    if (removeInstructionsBeforeUnreachable(SI))
1464
      return &SI;
1465

1466
    // Remove all instructions after the marker and handle dead blocks this
1467
    // implies.
1468
    SmallVector<BasicBlock *> Worklist;
1469
    handleUnreachableFrom(SI.getNextNode(), Worklist);
1470
    handlePotentiallyDeadBlocks(Worklist);
1471
    return nullptr;
1472
  }
1473

1474
  // store undef, Ptr -> noop
1475
  // FIXME: This is technically incorrect because it might overwrite a poison
1476
  // value. Change to PoisonValue once #52930 is resolved.
1477
  if (isa<UndefValue>(Val))
1478
    return eraseInstFromFunction(SI);
1479

1480
  return nullptr;
1481
}
1482

1483
/// Try to transform:
1484
///   if () { *P = v1; } else { *P = v2 }
1485
/// or:
1486
///   *P = v1; if () { *P = v2; }
1487
/// into a phi node with a store in the successor.
1488
bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
1489
  if (!SI.isUnordered())
1490
    return false; // This code has not been audited for volatile/ordered case.
1491

1492
  // Check if the successor block has exactly 2 incoming edges.
1493
  BasicBlock *StoreBB = SI.getParent();
1494
  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1495
  if (!DestBB->hasNPredecessors(2))
1496
    return false;
1497

1498
  // Capture the other block (the block that doesn't contain our store).
1499
  pred_iterator PredIter = pred_begin(DestBB);
1500
  if (*PredIter == StoreBB)
1501
    ++PredIter;
1502
  BasicBlock *OtherBB = *PredIter;
1503

1504
  // Bail out if all of the relevant blocks aren't distinct. This can happen,
1505
  // for example, if SI is in an infinite loop.
1506
  if (StoreBB == DestBB || OtherBB == DestBB)
1507
    return false;
1508

1509
  // Verify that the other block ends in a branch and is not otherwise empty.
1510
  BasicBlock::iterator BBI(OtherBB->getTerminator());
1511
  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1512
  if (!OtherBr || BBI == OtherBB->begin())
1513
    return false;
1514

1515
  auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
1516
    if (!OtherStore ||
1517
        OtherStore->getPointerOperand() != SI.getPointerOperand())
1518
      return false;
1519

1520
    auto *SIVTy = SI.getValueOperand()->getType();
1521
    auto *OSVTy = OtherStore->getValueOperand()->getType();
1522
    return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
1523
           SI.hasSameSpecialState(OtherStore);
1524
  };
1525

1526
  // If the other block ends in an unconditional branch, check for the 'if then
1527
  // else' case. There is an instruction before the branch.
1528
  StoreInst *OtherStore = nullptr;
1529
  if (OtherBr->isUnconditional()) {
1530
    --BBI;
1531
    // Skip over debugging info and pseudo probes.
1532
    while (BBI->isDebugOrPseudoInst()) {
1533
      if (BBI==OtherBB->begin())
1534
        return false;
1535
      --BBI;
1536
    }
1537
    // If this isn't a store, isn't a store to the same location, or is not the
1538
    // right kind of store, bail out.
1539
    OtherStore = dyn_cast<StoreInst>(BBI);
1540
    if (!OtherStoreIsMergeable(OtherStore))
1541
      return false;
1542
  } else {
1543
    // Otherwise, the other block ended with a conditional branch. If one of the
1544
    // destinations is StoreBB, then we have the if/then case.
1545
    if (OtherBr->getSuccessor(0) != StoreBB &&
1546
        OtherBr->getSuccessor(1) != StoreBB)
1547
      return false;
1548

1549
    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1550
    // if/then triangle. See if there is a store to the same ptr as SI that
1551
    // lives in OtherBB.
1552
    for (;; --BBI) {
1553
      // Check to see if we find the matching store.
1554
      OtherStore = dyn_cast<StoreInst>(BBI);
1555
      if (OtherStoreIsMergeable(OtherStore))
1556
        break;
1557

1558
      // If we find something that may be using or overwriting the stored
1559
      // value, or if we run out of instructions, we can't do the transform.
1560
      if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1561
          BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1562
        return false;
1563
    }
1564

1565
    // In order to eliminate the store in OtherBr, we have to make sure nothing
1566
    // reads or overwrites the stored value in StoreBB.
1567
    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1568
      // FIXME: This should really be AA driven.
1569
      if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1570
        return false;
1571
    }
1572
  }
1573

1574
  // Insert a PHI node now if we need it.
1575
  Value *MergedVal = OtherStore->getValueOperand();
1576
  // The debug locations of the original instructions might differ. Merge them.
1577
  DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
1578
                                                     OtherStore->getDebugLoc());
1579
  if (MergedVal != SI.getValueOperand()) {
1580
    PHINode *PN =
1581
        PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
1582
    PN->addIncoming(SI.getValueOperand(), SI.getParent());
1583
    Builder.SetInsertPoint(OtherStore);
1584
    PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
1585
                    OtherBB);
1586
    MergedVal = InsertNewInstBefore(PN, DestBB->begin());
1587
    PN->setDebugLoc(MergedLoc);
1588
  }
1589

1590
  // Advance to a place where it is safe to insert the new store and insert it.
1591
  BBI = DestBB->getFirstInsertionPt();
1592
  StoreInst *NewSI =
1593
      new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
1594
                    SI.getOrdering(), SI.getSyncScopeID());
1595
  InsertNewInstBefore(NewSI, BBI);
1596
  NewSI->setDebugLoc(MergedLoc);
1597
  NewSI->mergeDIAssignID({&SI, OtherStore});
1598

1599
  // If the two stores had AA tags, merge them.
1600
  AAMDNodes AATags = SI.getAAMetadata();
1601
  if (AATags)
1602
    NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
1603

1604
  // Nuke the old stores.
1605
  eraseInstFromFunction(SI);
1606
  eraseInstFromFunction(*OtherStore);
1607
  return true;
1608
}
1609

1610
Product

Resources

Company