Skip to content

Commit 7f15254

Browse files
committed
[LV] Apply sink-after & interleave-groups as VPlan transformations (NFCI)
This recommits 11ed1c0 (reverted in 9f08ce0 for failing an assert) with a fix: tryToWidenMemory() now first checks if the widening decision is to interleave, thus maintaining previous behavior where tryToInterleaveMemory() was called first, giving priority to interleave decisions over widening/scalarization. This commit adds the test case that exposed this bug as a LIT.
1 parent 0ac2963 commit 7f15254

File tree

9 files changed

+263
-134
lines changed

9 files changed

+263
-134
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -542,13 +542,10 @@ class InterleavedAccessInfo {
542542
/// formation for predicated accesses, we may be able to relax this limitation
543543
/// in the future once we handle more complicated blocks.
544544
void reset() {
545-
SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
546-
// Avoid releasing a pointer twice.
547-
for (auto &I : InterleaveGroupMap)
548-
DelSet.insert(I.second);
549-
for (auto *Ptr : DelSet)
550-
delete Ptr;
551545
InterleaveGroupMap.clear();
546+
for (auto *Ptr : InterleaveGroups)
547+
delete Ptr;
548+
InterleaveGroups.clear();
552549
RequiresScalarEpilogue = false;
553550
}
554551

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,9 @@ class LoopVectorizationPlanner {
201201
/// The profitability analysis.
202202
LoopVectorizationCostModel &CM;
203203

204+
/// The interleaved access analysis.
205+
InterleavedAccessInfo &IAI;
206+
204207
SmallVector<VPlanPtr, 4> VPlans;
205208

206209
/// This class is used to enable the VPlan to invoke a method of ILV. This is
@@ -223,8 +226,10 @@ class LoopVectorizationPlanner {
223226
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
224227
const TargetTransformInfo *TTI,
225228
LoopVectorizationLegality *Legal,
226-
LoopVectorizationCostModel &CM)
227-
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
229+
LoopVectorizationCostModel &CM,
230+
InterleavedAccessInfo &IAI)
231+
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
232+
IAI(IAI) {}
228233

229234
/// Plan how to best vectorize, return the best VF and its cost, or None if
230235
/// vectorization and interleaving should be avoided up front.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 98 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
67106710
return BlockMaskCache[BB] = BlockMask;
67116711
}
67126712

6713-
VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
6714-
VFRange &Range,
6715-
VPlanPtr &Plan) {
6716-
const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I);
6717-
if (!IG)
6718-
return nullptr;
6719-
6720-
// Now check if IG is relevant for VF's in the given range.
6721-
auto isIGMember = [&](Instruction *I) -> std::function<bool(unsigned)> {
6722-
return [=](unsigned VF) -> bool {
6723-
return (VF >= 2 && // Query is illegal for VF == 1
6724-
CM.getWideningDecision(I, VF) ==
6725-
LoopVectorizationCostModel::CM_Interleave);
6726-
};
6727-
};
6728-
if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range))
6729-
return nullptr;
6730-
6731-
// I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
6732-
// range. If it's the primary member of the IG construct a VPInterleaveRecipe.
6733-
// Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
6734-
assert(I == IG->getInsertPos() &&
6735-
"Generating a recipe for an adjunct member of an interleave group");
6736-
6737-
VPValue *Mask = nullptr;
6738-
if (Legal->isMaskRequired(I))
6739-
Mask = createBlockInMask(I->getParent(), Plan);
6740-
6741-
return new VPInterleaveRecipe(IG, Mask);
6742-
}
6743-
67446713
VPWidenMemoryInstructionRecipe *
67456714
VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
67466715
VPlanPtr &Plan) {
@@ -6750,15 +6719,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
67506719
auto willWiden = [&](unsigned VF) -> bool {
67516720
if (VF == 1)
67526721
return false;
6753-
if (CM.isScalarAfterVectorization(I, VF) ||
6754-
CM.isProfitableToScalarize(I, VF))
6755-
return false;
67566722
LoopVectorizationCostModel::InstWidening Decision =
67576723
CM.getWideningDecision(I, VF);
67586724
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
67596725
"CM decision should be taken at this point.");
6760-
assert(Decision != LoopVectorizationCostModel::CM_Interleave &&
6761-
"Interleave memory opportunity should be caught earlier.");
6726+
if (Decision == LoopVectorizationCostModel::CM_Interleave)
6727+
return true;
6728+
if (CM.isScalarAfterVectorization(I, VF) ||
6729+
CM.isProfitableToScalarize(I, VF))
6730+
return false;
67626731
return Decision != LoopVectorizationCostModel::CM_Scalarize;
67636732
};
67646733

@@ -6923,15 +6892,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
69236892
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
69246893
return false;
69256894

6895+
// If this ingredient's recipe is to be recorded, keep its recipe a singleton
6896+
// to avoid having to split recipes later.
6897+
bool IsSingleton = Ingredient2Recipe.count(I);
6898+
69266899
// Success: widen this instruction. We optimize the common case where
69276900
// consecutive instructions can be represented by a single recipe.
6928-
if (!VPBB->empty()) {
6929-
VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
6930-
if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
6931-
return true;
6932-
}
6901+
if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() &&
6902+
LastExtensibleRecipe->appendInstruction(I))
6903+
return true;
69336904

6934-
VPBB->appendRecipe(new VPWidenRecipe(I));
6905+
VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I);
6906+
if (!IsSingleton)
6907+
LastExtensibleRecipe = WidenRecipe;
6908+
setRecipe(I, WidenRecipe);
6909+
VPBB->appendRecipe(WidenRecipe);
69356910
return true;
69366911
}
69376912

@@ -6947,6 +6922,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
69476922
[&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
69486923

69496924
auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
6925+
setRecipe(I, Recipe);
69506926

69516927
// Find if I uses a predicated instruction. If so, it will use its scalar
69526928
// value. Avoid hoisting the insert-element which packs the scalar value into
@@ -7005,36 +6981,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
70056981
bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
70066982
VPlanPtr &Plan, VPBasicBlock *VPBB) {
70076983
VPRecipeBase *Recipe = nullptr;
7008-
// Check if Instr should belong to an interleave memory recipe, or already
7009-
// does. In the latter case Instr is irrelevant.
7010-
if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) {
7011-
VPBB->appendRecipe(Recipe);
7012-
return true;
7013-
}
7014-
7015-
// Check if Instr is a memory operation that should be widened.
7016-
if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) {
7017-
VPBB->appendRecipe(Recipe);
7018-
return true;
7019-
}
70206984

7021-
// Check if Instr should form some PHI recipe.
7022-
if ((Recipe = tryToOptimizeInduction(Instr, Range))) {
7023-
VPBB->appendRecipe(Recipe);
7024-
return true;
7025-
}
7026-
if ((Recipe = tryToBlend(Instr, Plan))) {
6985+
// First, check for specific widening recipes that deal with memory
6986+
// operations, inductions and Phi nodes.
6987+
if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) ||
6988+
(Recipe = tryToOptimizeInduction(Instr, Range)) ||
6989+
(Recipe = tryToBlend(Instr, Plan)) ||
6990+
(isa<PHINode>(Instr) &&
6991+
(Recipe = new VPWidenPHIRecipe(cast<PHINode>(Instr))))) {
6992+
setRecipe(Instr, Recipe);
70276993
VPBB->appendRecipe(Recipe);
70286994
return true;
70296995
}
7030-
if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
7031-
VPBB->appendRecipe(new VPWidenPHIRecipe(Phi));
7032-
return true;
7033-
}
70346996

7035-
// Check if Instr is to be widened by a general VPWidenRecipe, after
7036-
// having first checked for specific widening recipes that deal with
7037-
// Interleave Groups, Inductions and Phi nodes.
6997+
// Check if Instr is to be widened by a general VPWidenRecipe.
70386998
if (tryToWiden(Instr, VPBB, Range))
70396999
return true;
70407000

@@ -7090,19 +7050,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
70907050
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
70917051
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
70927052
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
7053+
70937054
// Hold a mapping from predicated instructions to their recipes, in order to
70947055
// fix their AlsoPack behavior if a user is determined to replicate and use a
70957056
// scalar instead of vector value.
70967057
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
70977058

70987059
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
7099-
DenseMap<Instruction *, Instruction *> SinkAfterInverse;
7060+
7061+
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
7062+
7063+
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
7064+
7065+
// ---------------------------------------------------------------------------
7066+
// Pre-construction: record ingredients whose recipes we'll need to further
7067+
// process after constructing the initial VPlan.
7068+
// ---------------------------------------------------------------------------
7069+
7070+
// Mark instructions we'll need to sink later and their targets as
7071+
// ingredients whose recipe we'll need to record.
7072+
for (auto &Entry : SinkAfter) {
7073+
RecipeBuilder.recordRecipeOf(Entry.first);
7074+
RecipeBuilder.recordRecipeOf(Entry.second);
7075+
}
7076+
7077+
// For each interleave group which is relevant for this (possibly trimmed)
7078+
// Range, add it to the set of groups to be later applied to the VPlan and add
7079+
// placeholders for its members' Recipes which we'll be replacing with a
7080+
// single VPInterleaveRecipe.
7081+
for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
7082+
auto applyIG = [IG, this](unsigned VF) -> bool {
7083+
return (VF >= 2 && // Query is illegal for VF == 1
7084+
CM.getWideningDecision(IG->getInsertPos(), VF) ==
7085+
LoopVectorizationCostModel::CM_Interleave);
7086+
};
7087+
if (!getDecisionAndClampRange(applyIG, Range))
7088+
continue;
7089+
InterleaveGroups.insert(IG);
7090+
for (unsigned i = 0; i < IG->getFactor(); i++)
7091+
if (Instruction *Member = IG->getMember(i))
7092+
RecipeBuilder.recordRecipeOf(Member);
7093+
};
7094+
7095+
// ---------------------------------------------------------------------------
7096+
// Build initial VPlan: Scan the body of the loop in a topological order to
7097+
// visit each basic block after having visited its predecessor basic blocks.
7098+
// ---------------------------------------------------------------------------
71007099

71017100
// Create a dummy pre-entry VPBasicBlock to start building the VPlan.
71027101
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
71037102
auto Plan = std::make_unique<VPlan>(VPBB);
71047103

7105-
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
71067104
// Represent values that will have defs inside VPlan.
71077105
for (Value *V : NeedDef)
71087106
Plan->addVPValue(V);
@@ -7123,8 +7121,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71237121

71247122
std::vector<Instruction *> Ingredients;
71257123

7126-
// Organize the ingredients to vectorize from current basic block in the
7127-
// right order.
7124+
// Introduce each ingredient into VPlan.
71287125
for (Instruction &I : BB->instructionsWithoutDebug()) {
71297126
Instruction *Instr = &I;
71307127

@@ -7134,43 +7131,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71347131
DeadInstructions.find(Instr) != DeadInstructions.end())
71357132
continue;
71367133

7137-
// I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
7138-
// member of the IG, do not construct any Recipe for it.
7139-
const InterleaveGroup<Instruction> *IG =
7140-
CM.getInterleavedAccessGroup(Instr);
7141-
if (IG && Instr != IG->getInsertPos() &&
7142-
Range.Start >= 2 && // Query is illegal for VF == 1
7143-
CM.getWideningDecision(Instr, Range.Start) ==
7144-
LoopVectorizationCostModel::CM_Interleave) {
7145-
auto SinkCandidate = SinkAfterInverse.find(Instr);
7146-
if (SinkCandidate != SinkAfterInverse.end())
7147-
Ingredients.push_back(SinkCandidate->second);
7148-
continue;
7149-
}
7150-
7151-
// Move instructions to handle first-order recurrences, step 1: avoid
7152-
// handling this instruction until after we've handled the instruction it
7153-
// should follow.
7154-
auto SAIt = SinkAfter.find(Instr);
7155-
if (SAIt != SinkAfter.end()) {
7156-
LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after"
7157-
<< *SAIt->second
7158-
<< " to vectorize a 1st order recurrence.\n");
7159-
SinkAfterInverse[SAIt->second] = Instr;
7160-
continue;
7161-
}
7162-
7163-
Ingredients.push_back(Instr);
7164-
7165-
// Move instructions to handle first-order recurrences, step 2: push the
7166-
// instruction to be sunk at its insertion point.
7167-
auto SAInvIt = SinkAfterInverse.find(Instr);
7168-
if (SAInvIt != SinkAfterInverse.end())
7169-
Ingredients.push_back(SAInvIt->second);
7170-
}
7171-
7172-
// Introduce each ingredient into VPlan.
7173-
for (Instruction *Instr : Ingredients) {
71747134
if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB))
71757135
continue;
71767136

@@ -7195,6 +7155,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71957155
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
71967156
delete PreEntry;
71977157

7158+
// ---------------------------------------------------------------------------
7159+
// Transform initial VPlan: Apply previously taken decisions, in order, to
7160+
// bring the VPlan to its final state.
7161+
// ---------------------------------------------------------------------------
7162+
7163+
// Apply Sink-After legal constraints.
7164+
for (auto &Entry : SinkAfter) {
7165+
VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
7166+
VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
7167+
Sink->moveAfter(Target);
7168+
}
7169+
7170+
// Interleave memory: for each Interleave Group we marked earlier as relevant
7171+
// for this VPlan, replace the Recipes widening its memory instructions with a
7172+
// single VPInterleaveRecipe at its insertion point.
7173+
for (auto IG : InterleaveGroups) {
7174+
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
7175+
RecipeBuilder.getRecipe(IG->getInsertPos()));
7176+
(new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe);
7177+
7178+
for (unsigned i = 0; i < IG->getFactor(); ++i)
7179+
if (Instruction *Member = IG->getMember(i)) {
7180+
RecipeBuilder.getRecipe(Member)->eraseFromParent();
7181+
}
7182+
}
7183+
71987184
// Finally, if tail is folded by masking, introduce selects between the phi
71997185
// and the live-out instruction of each reduction, at the end of the latch.
72007186
if (CM.foldTailByMasking()) {
@@ -7427,12 +7413,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
74277413
}
74287414

74297415
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
7430-
if (!User)
7416+
VPValue *Mask = getMask();
7417+
if (!Mask)
74317418
return State.ILV->vectorizeMemoryInstruction(&Instr);
74327419

7433-
// Last (and currently only) operand is a mask.
74347420
InnerLoopVectorizer::VectorParts MaskValues(State.UF);
7435-
VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
74367421
for (unsigned Part = 0; Part < State.UF; ++Part)
74377422
MaskValues[Part] = State.get(Mask, Part);
74387423
State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
@@ -7481,7 +7466,7 @@ static bool processLoopInVPlanNativePath(
74817466
// Use the planner for outer loop vectorization.
74827467
// TODO: CM is not used at this point inside the planner. Turn CM into an
74837468
// optional argument if we don't need it in the future.
7484-
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
7469+
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI);
74857470

74867471
// Get user vectorization factor.
74877472
const unsigned UserVF = Hints.getWidth();
@@ -7641,7 +7626,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
76417626
CM.collectValuesToIgnore();
76427627

76437628
// Use the planner for vectorization.
7644-
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM);
7629+
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI);
76457630

76467631
// Get user vectorization factor.
76477632
unsigned UserVF = Hints.getWidth();

0 commit comments

Comments
 (0)