Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
213799 views
1
//===-- VPlanUnroll.cpp - VPlan unroller ----------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file implements explicit unrolling for VPlans.
11
///
12
//===----------------------------------------------------------------------===//
13
14
#include "VPRecipeBuilder.h"
15
#include "VPlan.h"
16
#include "VPlanAnalysis.h"
17
#include "VPlanCFG.h"
18
#include "VPlanHelpers.h"
19
#include "VPlanPatternMatch.h"
20
#include "VPlanTransforms.h"
21
#include "VPlanUtils.h"
22
#include "llvm/ADT/PostOrderIterator.h"
23
#include "llvm/ADT/STLExtras.h"
24
#include "llvm/ADT/ScopeExit.h"
25
#include "llvm/Analysis/IVDescriptors.h"
26
#include "llvm/IR/Intrinsics.h"
27
28
using namespace llvm;
29
using namespace llvm::VPlanPatternMatch;
30
31
namespace {
32
33
/// Helper to hold state needed for unrolling. It holds the Plan to unroll by
34
/// UF. It also holds copies of VPValues across UF-1 unroll parts to facilitate
35
/// the unrolling transformation, where the original VPValues are retained for
36
/// part zero.
37
class UnrollState {
38
/// Plan to unroll.
39
VPlan &Plan;
40
/// Unroll factor to unroll by.
41
const unsigned UF;
42
/// Analysis for types.
43
VPTypeAnalysis TypeInfo;
44
45
/// Unrolling may create recipes that should not be unrolled themselves.
46
/// Those are tracked in ToSkip.
47
SmallPtrSet<VPRecipeBase *, 8> ToSkip;
48
49
// Associate with each VPValue of part 0 its unrolled instances of parts 1,
50
// ..., UF-1.
51
DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts;
52
53
/// Unroll replicate region \p VPR by cloning the region UF - 1 times.
54
void unrollReplicateRegionByUF(VPRegionBlock *VPR);
55
56
/// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
57
/// all parts.
58
void unrollRecipeByUF(VPRecipeBase &R);
59
60
/// Unroll header phi recipe \p R. How exactly the recipe gets unrolled
61
/// depends on the concrete header phi. Inserts newly created recipes at \p
62
/// InsertPtForPhi.
63
void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
64
VPBasicBlock::iterator InsertPtForPhi);
65
66
/// Unroll a widen induction recipe \p IV. This introduces recipes to compute
67
/// the induction steps for each part.
68
void unrollWidenInductionByUF(VPWidenIntOrFpInductionRecipe *IV,
69
VPBasicBlock::iterator InsertPtForPhi);
70
71
VPValue *getConstantVPV(unsigned Part) {
72
Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
73
return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
74
}
75
76
public:
77
UnrollState(VPlan &Plan, unsigned UF, LLVMContext &Ctx)
78
: Plan(Plan), UF(UF), TypeInfo(Plan.getCanonicalIV()->getScalarType()) {}
79
80
void unrollBlock(VPBlockBase *VPB);
81
82
VPValue *getValueForPart(VPValue *V, unsigned Part) {
83
if (Part == 0 || V->isLiveIn())
84
return V;
85
assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&
86
"accessed value does not exist");
87
return VPV2Parts[V][Part - 1];
88
}
89
90
/// Given a single original recipe \p OrigR (of part zero), and its copy \p
91
/// CopyR for part \p Part, map every VPValue defined by \p OrigR to its
92
/// corresponding VPValue defined by \p CopyR.
93
void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
94
unsigned Part) {
95
for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) {
96
auto Ins = VPV2Parts.insert({VPV, {}});
97
assert(Ins.first->second.size() == Part - 1 && "earlier parts not set");
98
Ins.first->second.push_back(CopyR->getVPValue(Idx));
99
}
100
}
101
102
/// Given a uniform recipe \p R, add it for all parts.
103
void addUniformForAllParts(VPSingleDefRecipe *R) {
104
auto Ins = VPV2Parts.insert({R, {}});
105
assert(Ins.second && "uniform value already added");
106
for (unsigned Part = 0; Part != UF; ++Part)
107
Ins.first->second.push_back(R);
108
}
109
110
bool contains(VPValue *VPV) const { return VPV2Parts.contains(VPV); }
111
112
/// Update \p R's operand at \p OpIdx with its corresponding VPValue for part
113
/// \p P.
114
void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
115
auto *Op = R->getOperand(OpIdx);
116
R->setOperand(OpIdx, getValueForPart(Op, Part));
117
}
118
119
/// Update \p R's operands with their corresponding VPValues for part \p P.
120
void remapOperands(VPRecipeBase *R, unsigned Part) {
121
for (const auto &[OpIdx, Op] : enumerate(R->operands()))
122
R->setOperand(OpIdx, getValueForPart(Op, Part));
123
}
124
};
125
} // namespace
126
127
void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
128
VPBlockBase *InsertPt = VPR->getSingleSuccessor();
129
for (unsigned Part = 1; Part != UF; ++Part) {
130
auto *Copy = VPR->clone();
131
VPBlockUtils::insertBlockBefore(Copy, InsertPt);
132
133
auto PartI = vp_depth_first_shallow(Copy->getEntry());
134
auto Part0 = vp_depth_first_shallow(VPR->getEntry());
135
for (const auto &[PartIVPBB, Part0VPBB] :
136
zip(VPBlockUtils::blocksOnly<VPBasicBlock>(PartI),
137
VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
138
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
139
remapOperands(&PartIR, Part);
140
if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
141
ScalarIVSteps->addOperand(getConstantVPV(Part));
142
}
143
144
addRecipeForPart(&Part0R, &PartIR, Part);
145
}
146
}
147
}
148
}
149
150
void UnrollState::unrollWidenInductionByUF(
151
VPWidenIntOrFpInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) {
152
VPBasicBlock *PH = cast<VPBasicBlock>(
153
IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
154
Type *IVTy = TypeInfo.inferScalarType(IV);
155
auto &ID = IV->getInductionDescriptor();
156
VPIRFlags Flags;
157
if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
158
Flags = ID.getInductionBinOp()->getFastMathFlags();
159
160
VPValue *ScalarStep = IV->getStepValue();
161
VPBuilder Builder(PH);
162
VPInstruction *VectorStep = Builder.createNaryOp(
163
VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, IVTy, Flags,
164
IV->getDebugLoc());
165
166
ToSkip.insert(VectorStep);
167
168
// Now create recipes to compute the induction steps for part 1 .. UF. Part 0
169
// remains the header phi. Parts > 0 are computed by adding Step to the
170
// previous part. The header phi recipe will get 2 new operands: the step
171
// value for a single part and the last part, used to compute the backedge
172
// value during VPWidenIntOrFpInductionRecipe::execute. %Part.0 =
173
// VPWidenIntOrFpInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3
174
// %Part.1 = %Part.0 + %VectorStep
175
// %Part.2 = %Part.1 + %VectorStep
176
// %Part.3 = %Part.2 + %VectorStep
177
//
178
// The newly added recipes are added to ToSkip to avoid interleaving them
179
// again.
180
VPValue *Prev = IV;
181
Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
182
unsigned AddOpc =
183
IVTy->isFloatingPointTy() ? ID.getInductionOpcode() : Instruction::Add;
184
for (unsigned Part = 1; Part != UF; ++Part) {
185
std::string Name =
186
Part > 1 ? "step.add." + std::to_string(Part) : "step.add";
187
188
VPInstruction *Add = Builder.createNaryOp(AddOpc,
189
{
190
Prev,
191
VectorStep,
192
},
193
Flags, IV->getDebugLoc(), Name);
194
ToSkip.insert(Add);
195
addRecipeForPart(IV, Add, Part);
196
Prev = Add;
197
}
198
IV->addOperand(VectorStep);
199
IV->addOperand(Prev);
200
}
201
202
void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
203
VPBasicBlock::iterator InsertPtForPhi) {
204
// First-order recurrences pass a single vector or scalar through their header
205
// phis, irrespective of interleaving.
206
if (isa<VPFirstOrderRecurrencePHIRecipe>(R))
207
return;
208
209
// Generate step vectors for each unrolled part.
210
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(R)) {
211
unrollWidenInductionByUF(IV, InsertPtForPhi);
212
return;
213
}
214
215
auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R);
216
if (RdxPhi && RdxPhi->isOrdered())
217
return;
218
219
auto InsertPt = std::next(R->getIterator());
220
for (unsigned Part = 1; Part != UF; ++Part) {
221
VPRecipeBase *Copy = R->clone();
222
Copy->insertBefore(*R->getParent(), InsertPt);
223
addRecipeForPart(R, Copy, Part);
224
if (isa<VPWidenPointerInductionRecipe>(R)) {
225
Copy->addOperand(R);
226
Copy->addOperand(getConstantVPV(Part));
227
} else if (RdxPhi) {
228
// If the start value is a ReductionStartVector, use the identity value
229
// (second operand) for unrolled parts. If the scaling factor is > 1,
230
// create a new ReductionStartVector with the scale factor and both
231
// operands set to the identity value.
232
if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
233
assert(VPI->getOpcode() == VPInstruction::ReductionStartVector &&
234
"unexpected start VPInstruction");
235
if (Part != 1)
236
continue;
237
VPValue *StartV;
238
if (match(VPI->getOperand(2), m_SpecificInt(1))) {
239
StartV = VPI->getOperand(1);
240
} else {
241
auto *C = VPI->clone();
242
C->setOperand(0, C->getOperand(1));
243
C->insertAfter(VPI);
244
StartV = C;
245
}
246
for (unsigned Part = 1; Part != UF; ++Part)
247
VPV2Parts[VPI][Part - 1] = StartV;
248
}
249
Copy->addOperand(getConstantVPV(Part));
250
} else {
251
assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
252
"unexpected header phi recipe not needing unrolled part");
253
}
254
}
255
}
256
257
/// Handle non-header-phi recipes.
258
void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
259
if (match(&R, m_BranchOnCond(m_VPValue())) ||
260
match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
261
return;
262
263
if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
264
if (vputils::onlyFirstPartUsed(VPI)) {
265
addUniformForAllParts(VPI);
266
return;
267
}
268
}
269
if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
270
if (isa<StoreInst>(RepR->getUnderlyingValue()) &&
271
RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
272
// Stores to an invariant address only need to store the last part.
273
remapOperands(&R, UF - 1);
274
return;
275
}
276
if (auto *II = dyn_cast<IntrinsicInst>(RepR->getUnderlyingValue())) {
277
if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {
278
addUniformForAllParts(RepR);
279
return;
280
}
281
}
282
}
283
284
// Unroll non-uniform recipes.
285
auto InsertPt = std::next(R.getIterator());
286
VPBasicBlock &VPBB = *R.getParent();
287
for (unsigned Part = 1; Part != UF; ++Part) {
288
VPRecipeBase *Copy = R.clone();
289
Copy->insertBefore(VPBB, InsertPt);
290
addRecipeForPart(&R, Copy, Part);
291
292
VPValue *Op;
293
if (match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
294
m_VPValue(), m_VPValue(Op)))) {
295
Copy->setOperand(0, getValueForPart(Op, Part - 1));
296
Copy->setOperand(1, getValueForPart(Op, Part));
297
continue;
298
}
299
if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
300
auto *Phi = dyn_cast<VPReductionPHIRecipe>(R.getOperand(0));
301
if (Phi && Phi->isOrdered()) {
302
auto &Parts = VPV2Parts[Phi];
303
if (Part == 1) {
304
Parts.clear();
305
Parts.push_back(Red);
306
}
307
Parts.push_back(Copy->getVPSingleValue());
308
Phi->setOperand(1, Copy->getVPSingleValue());
309
}
310
}
311
remapOperands(Copy, Part);
312
313
// Add operand indicating the part to generate code for, to recipes still
314
// requiring it.
315
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
316
VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
317
match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
318
m_VPValue())))
319
Copy->addOperand(getConstantVPV(Part));
320
321
if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(R))
322
Copy->setOperand(0, R.getOperand(0));
323
}
324
}
325
326
void UnrollState::unrollBlock(VPBlockBase *VPB) {
327
auto *VPR = dyn_cast<VPRegionBlock>(VPB);
328
if (VPR) {
329
if (VPR->isReplicator())
330
return unrollReplicateRegionByUF(VPR);
331
332
// Traverse blocks in region in RPO to ensure defs are visited before uses
333
// across blocks.
334
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
335
RPOT(VPR->getEntry());
336
for (VPBlockBase *VPB : RPOT)
337
unrollBlock(VPB);
338
return;
339
}
340
341
// VPB is a VPBasicBlock; unroll it, i.e., unroll its recipes.
342
auto *VPBB = cast<VPBasicBlock>(VPB);
343
auto InsertPtForPhi = VPBB->getFirstNonPhi();
344
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
345
if (ToSkip.contains(&R) || isa<VPIRInstruction>(&R))
346
continue;
347
348
// Add all VPValues for all parts to AnyOf, FirstActiveLaneMask and
349
// Compute*Result which combine all parts to compute the final value.
350
VPValue *Op1;
351
if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) ||
352
match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>(
353
m_VPValue(Op1))) ||
354
match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
355
m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
356
match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
357
m_VPValue(), m_VPValue(Op1))) ||
358
match(&R, m_VPInstruction<VPInstruction::ComputeFindIVResult>(
359
m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
360
addUniformForAllParts(cast<VPInstruction>(&R));
361
for (unsigned Part = 1; Part != UF; ++Part)
362
R.addOperand(getValueForPart(Op1, Part));
363
continue;
364
}
365
VPValue *Op0;
366
if (match(&R, m_VPInstruction<VPInstruction::ExtractLastElement>(
367
m_VPValue(Op0))) ||
368
match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
369
m_VPValue(Op0)))) {
370
addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
371
if (Plan.hasScalarVFOnly()) {
372
auto *I = cast<VPInstruction>(&R);
373
// Extracting from end with VF = 1 implies retrieving the last or
374
// penultimate scalar part (UF-1 or UF-2).
375
unsigned Offset =
376
I->getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
377
I->replaceAllUsesWith(getValueForPart(Op0, UF - Offset));
378
R.eraseFromParent();
379
} else {
380
// Otherwise we extract from the last part.
381
remapOperands(&R, UF - 1);
382
}
383
continue;
384
}
385
386
auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R);
387
if (SingleDef && vputils::isUniformAcrossVFsAndUFs(SingleDef)) {
388
addUniformForAllParts(SingleDef);
389
continue;
390
}
391
392
if (auto *H = dyn_cast<VPHeaderPHIRecipe>(&R)) {
393
unrollHeaderPHIByUF(H, InsertPtForPhi);
394
continue;
395
}
396
397
unrollRecipeByUF(R);
398
}
399
}
400
401
void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
402
assert(UF > 0 && "Unroll factor must be positive");
403
Plan.setUF(UF);
404
auto Cleanup = make_scope_exit([&Plan]() {
405
auto Iter = vp_depth_first_deep(Plan.getEntry());
406
// Remove recipes that are redundant after unrolling.
407
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
408
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
409
auto *VPI = dyn_cast<VPInstruction>(&R);
410
if (VPI &&
411
VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart &&
412
VPI->getNumOperands() == 1) {
413
VPI->replaceAllUsesWith(VPI->getOperand(0));
414
VPI->eraseFromParent();
415
}
416
}
417
}
418
});
419
if (UF == 1) {
420
return;
421
}
422
423
UnrollState Unroller(Plan, UF, Ctx);
424
425
// Iterate over all blocks in the plan starting from Entry, and unroll
426
// recipes inside them. This includes the vector preheader and middle blocks,
427
// which may set up or post-process per-part values.
428
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
429
Plan.getEntry());
430
for (VPBlockBase *VPB : RPOT)
431
Unroller.unrollBlock(VPB);
432
433
unsigned Part = 1;
434
// Remap operands of cloned header phis to update backedge values. The header
435
// phis cloned during unrolling are just after the header phi for part 0.
436
// Reset Part to 1 when reaching the first (part 0) recipe of a block.
437
for (VPRecipeBase &H :
438
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
439
// The second operand of Fixed Order Recurrence phi's, feeding the spliced
440
// value across the backedge, needs to remap to the last part of the spliced
441
// value.
442
if (isa<VPFirstOrderRecurrencePHIRecipe>(&H)) {
443
Unroller.remapOperand(&H, 1, UF - 1);
444
continue;
445
}
446
if (Unroller.contains(H.getVPSingleValue()) ||
447
isa<VPWidenPointerInductionRecipe>(&H)) {
448
Part = 1;
449
continue;
450
}
451
Unroller.remapOperands(&H, Part);
452
Part++;
453
}
454
455
VPlanTransforms::removeDeadRecipes(Plan);
456
}
457
458
/// Create a single-scalar clone of \p RepR for lane \p Lane.
459
static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
460
Type *IdxTy, VPReplicateRecipe *RepR,
461
VPLane Lane) {
462
// Collect the operands at Lane, creating extracts as needed.
463
SmallVector<VPValue *> NewOps;
464
for (VPValue *Op : RepR->operands()) {
465
if (vputils::isSingleScalar(Op)) {
466
NewOps.push_back(Op);
467
continue;
468
}
469
if (Lane.getKind() == VPLane::Kind::ScalableLast) {
470
NewOps.push_back(
471
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
472
continue;
473
}
474
// Look through buildvector to avoid unnecessary extracts.
475
if (match(Op, m_BuildVector())) {
476
NewOps.push_back(
477
cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane()));
478
continue;
479
}
480
VPValue *Idx =
481
Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
482
VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
483
NewOps.push_back(Ext);
484
}
485
486
auto *New =
487
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
488
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
489
New->transferFlags(*RepR);
490
New->insertBefore(RepR);
491
return New;
492
}
493
494
void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
495
Type *IdxTy = IntegerType::get(
496
Plan.getScalarHeader()->getIRBasicBlock()->getContext(), 32);
497
498
// Visit all VPBBs outside the loop region and directly inside the top-level
499
// loop region.
500
auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
501
vp_depth_first_shallow(Plan.getEntry()));
502
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
503
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
504
auto VPBBsToUnroll =
505
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
506
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
507
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
508
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
509
if (!RepR || RepR->isSingleScalar())
510
continue;
511
512
VPBuilder Builder(RepR);
513
if (RepR->getNumUsers() == 0) {
514
if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
515
vputils::isSingleScalar(RepR->getOperand(1))) {
516
// Stores to invariant addresses need to store the last lane only.
517
cloneForLane(Plan, Builder, IdxTy, RepR,
518
VPLane::getLastLaneForVF(VF));
519
} else {
520
// Create single-scalar version of RepR for all lanes.
521
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
522
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I));
523
}
524
RepR->eraseFromParent();
525
continue;
526
}
527
/// Create single-scalar version of RepR for all lanes.
528
SmallVector<VPValue *> LaneDefs;
529
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
530
LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)));
531
532
/// Users that only demand the first lane can use the definition for lane
533
/// 0.
534
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
535
return U.onlyFirstLaneUsed(RepR);
536
});
537
538
// If needed, create a Build(Struct)Vector recipe to insert the scalar
539
// lane values into a vector.
540
Type *ResTy = RepR->getUnderlyingInstr()->getType();
541
VPValue *VecRes = Builder.createNaryOp(
542
ResTy->isStructTy() ? VPInstruction::BuildStructVector
543
: VPInstruction::BuildVector,
544
LaneDefs);
545
RepR->replaceAllUsesWith(VecRes);
546
RepR->eraseFromParent();
547
}
548
}
549
}
550
551