@@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
67106710 return BlockMaskCache[BB] = BlockMask;
67116711}
67126712
6713- VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory (Instruction *I,
6714- VFRange &Range,
6715- VPlanPtr &Plan) {
6716- const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup (I);
6717- if (!IG)
6718- return nullptr ;
6719-
6720- // Now check if IG is relevant for VF's in the given range.
6721- auto isIGMember = [&](Instruction *I) -> std::function<bool (unsigned )> {
6722- return [=](unsigned VF) -> bool {
6723- return (VF >= 2 && // Query is illegal for VF == 1
6724- CM.getWideningDecision (I, VF) ==
6725- LoopVectorizationCostModel::CM_Interleave);
6726- };
6727- };
6728- if (!LoopVectorizationPlanner::getDecisionAndClampRange (isIGMember (I), Range))
6729- return nullptr ;
6730-
6731- // I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
6732- // range. If it's the primary member of the IG construct a VPInterleaveRecipe.
6733- // Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
6734- assert (I == IG->getInsertPos () &&
6735- " Generating a recipe for an adjunct member of an interleave group" );
6736-
6737- VPValue *Mask = nullptr ;
6738- if (Legal->isMaskRequired (I))
6739- Mask = createBlockInMask (I->getParent (), Plan);
6740-
6741- return new VPInterleaveRecipe (IG, Mask);
6742- }
6743-
67446713VPWidenMemoryInstructionRecipe *
67456714VPRecipeBuilder::tryToWidenMemory (Instruction *I, VFRange &Range,
67466715 VPlanPtr &Plan) {
@@ -6757,8 +6726,6 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
67576726 CM.getWideningDecision (I, VF);
67586727 assert (Decision != LoopVectorizationCostModel::CM_Unknown &&
67596728 " CM decision should be taken at this point." );
6760- assert (Decision != LoopVectorizationCostModel::CM_Interleave &&
6761- " Interleave memory opportunity should be caught earlier." );
67626729 return Decision != LoopVectorizationCostModel::CM_Scalarize;
67636730 };
67646731
@@ -6923,15 +6890,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
69236890 if (!LoopVectorizationPlanner::getDecisionAndClampRange (willWiden, Range))
69246891 return false ;
69256892
6893+ // If this ingredient's recipe is to be recorded, keep its recipe a singleton
6894+ // to avoid having to split recipes later.
6895+ bool IsSingleton = Ingredient2Recipe.count (I);
6896+
69266897 // Success: widen this instruction. We optimize the common case where
69276898 // consecutive instructions can be represented by a single recipe.
6928- if (!VPBB->empty ()) {
6929- VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back ());
6930- if (LastWidenRecipe && LastWidenRecipe->appendInstruction (I))
6931- return true ;
6932- }
6899+ if (!IsSingleton && !VPBB->empty () && LastExtensibleRecipe == &VPBB->back () &&
6900+ LastExtensibleRecipe->appendInstruction (I))
6901+ return true ;
69336902
6934- VPBB->appendRecipe (new VPWidenRecipe (I));
6903+ VPWidenRecipe *WidenRecipe = new VPWidenRecipe (I);
6904+ if (!IsSingleton)
6905+ LastExtensibleRecipe = WidenRecipe;
6906+ setRecipe (I, WidenRecipe);
6907+ VPBB->appendRecipe (WidenRecipe);
69356908 return true ;
69366909}
69376910
@@ -6947,6 +6920,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
69476920 [&](unsigned VF) { return CM.isScalarWithPredication (I, VF); }, Range);
69486921
69496922 auto *Recipe = new VPReplicateRecipe (I, IsUniform, IsPredicated);
6923+ setRecipe (I, Recipe);
69506924
69516925 // Find if I uses a predicated instruction. If so, it will use its scalar
69526926 // value. Avoid hoisting the insert-element which packs the scalar value into
@@ -7005,36 +6979,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
70056979bool VPRecipeBuilder::tryToCreateRecipe (Instruction *Instr, VFRange &Range,
70066980 VPlanPtr &Plan, VPBasicBlock *VPBB) {
70076981 VPRecipeBase *Recipe = nullptr ;
7008- // Check if Instr should belong to an interleave memory recipe, or already
7009- // does. In the latter case Instr is irrelevant.
7010- if ((Recipe = tryToInterleaveMemory (Instr, Range, Plan))) {
7011- VPBB->appendRecipe (Recipe);
7012- return true ;
7013- }
7014-
7015- // Check if Instr is a memory operation that should be widened.
7016- if ((Recipe = tryToWidenMemory (Instr, Range, Plan))) {
7017- VPBB->appendRecipe (Recipe);
7018- return true ;
7019- }
70206982
7021- // Check if Instr should form some PHI recipe.
7022- if ((Recipe = tryToOptimizeInduction (Instr, Range))) {
7023- VPBB->appendRecipe (Recipe);
7024- return true ;
7025- }
7026- if ((Recipe = tryToBlend (Instr, Plan))) {
6983+ // First, check for specific widening recipes that deal with memory
6984+ // operations, inductions and Phi nodes.
6985+ if ((Recipe = tryToWidenMemory (Instr, Range, Plan)) ||
6986+ (Recipe = tryToOptimizeInduction (Instr, Range)) ||
6987+ (Recipe = tryToBlend (Instr, Plan)) ||
6988+ (isa<PHINode>(Instr) &&
6989+ (Recipe = new VPWidenPHIRecipe (cast<PHINode>(Instr))))) {
6990+ setRecipe (Instr, Recipe);
70276991 VPBB->appendRecipe (Recipe);
70286992 return true ;
70296993 }
7030- if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
7031- VPBB->appendRecipe (new VPWidenPHIRecipe (Phi));
7032- return true ;
7033- }
70346994
7035- // Check if Instr is to be widened by a general VPWidenRecipe, after
7036- // having first checked for specific widening recipes that deal with
7037- // Interleave Groups, Inductions and Phi nodes.
6995+ // Check if Instr is to be widened by a general VPWidenRecipe.
70386996 if (tryToWiden (Instr, VPBB, Range))
70396997 return true ;
70406998
@@ -7090,19 +7048,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
70907048VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes (
70917049 VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
70927050 SmallPtrSetImpl<Instruction *> &DeadInstructions) {
7051+
70937052 // Hold a mapping from predicated instructions to their recipes, in order to
70947053 // fix their AlsoPack behavior if a user is determined to replicate and use a
70957054 // scalar instead of vector value.
70967055 DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
70977056
70987057 DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter ();
7099- DenseMap<Instruction *, Instruction *> SinkAfterInverse;
7058+
7059+ SmallPtrSet<const InterleaveGroup<Instruction> *, 1 > InterleaveGroups;
7060+
7061+ VPRecipeBuilder RecipeBuilder (OrigLoop, TLI, Legal, CM, Builder);
7062+
7063+ // ---------------------------------------------------------------------------
7064+ // Pre-construction: record ingredients whose recipes we'll need to further
7065+ // process after constructing the initial VPlan.
7066+ // ---------------------------------------------------------------------------
7067+
7068+ // Mark instructions we'll need to sink later and their targets as
7069+ // ingredients whose recipe we'll need to record.
7070+ for (auto &Entry : SinkAfter) {
7071+ RecipeBuilder.recordRecipeOf (Entry.first );
7072+ RecipeBuilder.recordRecipeOf (Entry.second );
7073+ }
7074+
7075+ // For each interleave group which is relevant for this (possibly trimmed)
7076+ // Range, add it to the set of groups to be later applied to the VPlan and add
7077+ // placeholders for its members' Recipes which we'll be replacing with a
7078+ // single VPInterleaveRecipe.
7079+ for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups ()) {
7080+ auto applyIG = [IG, this ](unsigned VF) -> bool {
7081+ return (VF >= 2 && // Query is illegal for VF == 1
7082+ CM.getWideningDecision (IG->getInsertPos (), VF) ==
7083+ LoopVectorizationCostModel::CM_Interleave);
7084+ };
7085+ if (!getDecisionAndClampRange (applyIG, Range))
7086+ continue ;
7087+ InterleaveGroups.insert (IG);
7088+ for (unsigned i = 0 ; i < IG->getFactor (); i++)
7089+ if (Instruction *Member = IG->getMember (i))
7090+ RecipeBuilder.recordRecipeOf (Member);
7091+ };
7092+
7093+ // ---------------------------------------------------------------------------
7094+ // Build initial VPlan: Scan the body of the loop in a topological order to
7095+ // visit each basic block after having visited its predecessor basic blocks.
7096+ // ---------------------------------------------------------------------------
71007097
71017098 // Create a dummy pre-entry VPBasicBlock to start building the VPlan.
71027099 VPBasicBlock *VPBB = new VPBasicBlock (" Pre-Entry" );
71037100 auto Plan = std::make_unique<VPlan>(VPBB);
71047101
7105- VPRecipeBuilder RecipeBuilder (OrigLoop, TLI, Legal, CM, Builder);
71067102 // Represent values that will have defs inside VPlan.
71077103 for (Value *V : NeedDef)
71087104 Plan->addVPValue (V);
@@ -7123,8 +7119,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71237119
71247120 std::vector<Instruction *> Ingredients;
71257121
7126- // Organize the ingredients to vectorize from current basic block in the
7127- // right order.
7122+ // Introduce each ingredient into VPlan.
71287123 for (Instruction &I : BB->instructionsWithoutDebug ()) {
71297124 Instruction *Instr = &I;
71307125
@@ -7134,43 +7129,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71347129 DeadInstructions.find (Instr) != DeadInstructions.end ())
71357130 continue ;
71367131
7137- // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
7138- // member of the IG, do not construct any Recipe for it.
7139- const InterleaveGroup<Instruction> *IG =
7140- CM.getInterleavedAccessGroup (Instr);
7141- if (IG && Instr != IG->getInsertPos () &&
7142- Range.Start >= 2 && // Query is illegal for VF == 1
7143- CM.getWideningDecision (Instr, Range.Start ) ==
7144- LoopVectorizationCostModel::CM_Interleave) {
7145- auto SinkCandidate = SinkAfterInverse.find (Instr);
7146- if (SinkCandidate != SinkAfterInverse.end ())
7147- Ingredients.push_back (SinkCandidate->second );
7148- continue ;
7149- }
7150-
7151- // Move instructions to handle first-order recurrences, step 1: avoid
7152- // handling this instruction until after we've handled the instruction it
7153- // should follow.
7154- auto SAIt = SinkAfter.find (Instr);
7155- if (SAIt != SinkAfter.end ()) {
7156- LLVM_DEBUG (dbgs () << " Sinking" << *SAIt->first << " after"
7157- << *SAIt->second
7158- << " to vectorize a 1st order recurrence.\n " );
7159- SinkAfterInverse[SAIt->second ] = Instr;
7160- continue ;
7161- }
7162-
7163- Ingredients.push_back (Instr);
7164-
7165- // Move instructions to handle first-order recurrences, step 2: push the
7166- // instruction to be sunk at its insertion point.
7167- auto SAInvIt = SinkAfterInverse.find (Instr);
7168- if (SAInvIt != SinkAfterInverse.end ())
7169- Ingredients.push_back (SAInvIt->second );
7170- }
7171-
7172- // Introduce each ingredient into VPlan.
7173- for (Instruction *Instr : Ingredients) {
71747132 if (RecipeBuilder.tryToCreateRecipe (Instr, Range, Plan, VPBB))
71757133 continue ;
71767134
@@ -7195,6 +7153,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71957153 VPBlockUtils::disconnectBlocks (PreEntry, Entry);
71967154 delete PreEntry;
71977155
7156+ // ---------------------------------------------------------------------------
7157+ // Transform initial VPlan: Apply previously taken decisions, in order, to
7158+ // bring the VPlan to its final state.
7159+ // ---------------------------------------------------------------------------
7160+
7161+ // Apply Sink-After legal constraints.
7162+ for (auto &Entry : SinkAfter) {
7163+ VPRecipeBase *Sink = RecipeBuilder.getRecipe (Entry.first );
7164+ VPRecipeBase *Target = RecipeBuilder.getRecipe (Entry.second );
7165+ Sink->moveAfter (Target);
7166+ }
7167+
7168+ // Interleave memory: for each Interleave Group we marked earlier as relevant
7169+ // for this VPlan, replace the Recipes widening its memory instructions with a
7170+ // single VPInterleaveRecipe at its insertion point.
7171+ for (auto IG : InterleaveGroups) {
7172+ auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
7173+ RecipeBuilder.getRecipe (IG->getInsertPos ()));
7174+ (new VPInterleaveRecipe (IG, Recipe->getMask ()))->insertBefore (Recipe);
7175+
7176+ for (unsigned i = 0 ; i < IG->getFactor (); ++i)
7177+ if (Instruction *Member = IG->getMember (i)) {
7178+ RecipeBuilder.getRecipe (Member)->eraseFromParent ();
7179+ }
7180+ }
7181+
71987182 // Finally, if tail is folded by masking, introduce selects between the phi
71997183 // and the live-out instruction of each reduction, at the end of the latch.
72007184 if (CM.foldTailByMasking ()) {
@@ -7427,12 +7411,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
74277411}
74287412
74297413void VPWidenMemoryInstructionRecipe::execute (VPTransformState &State) {
7430- if (!User)
7414+ VPValue *Mask = getMask ();
7415+ if (!Mask)
74317416 return State.ILV ->vectorizeMemoryInstruction (&Instr);
74327417
7433- // Last (and currently only) operand is a mask.
74347418 InnerLoopVectorizer::VectorParts MaskValues (State.UF );
7435- VPValue *Mask = User->getOperand (User->getNumOperands () - 1 );
74367419 for (unsigned Part = 0 ; Part < State.UF ; ++Part)
74377420 MaskValues[Part] = State.get (Mask, Part);
74387421 State.ILV ->vectorizeMemoryInstruction (&Instr, &MaskValues);
@@ -7473,7 +7456,7 @@ static bool processLoopInVPlanNativePath(
74737456 // Use the planner for outer loop vectorization.
74747457 // TODO: CM is not used at this point inside the planner. Turn CM into an
74757458 // optional argument if we don't need it in the future.
7476- LoopVectorizationPlanner LVP (L, LI, TLI, TTI, LVL, CM);
7459+ LoopVectorizationPlanner LVP (L, LI, TLI, TTI, LVL, CM, IAI );
74777460
74787461 // Get user vectorization factor.
74797462 const unsigned UserVF = Hints.getWidth ();
@@ -7631,7 +7614,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
76317614 CM.collectValuesToIgnore ();
76327615
76337616 // Use the planner for vectorization.
7634- LoopVectorizationPlanner LVP (L, LI, TLI, TTI, &LVL, CM);
7617+ LoopVectorizationPlanner LVP (L, LI, TLI, TTI, &LVL, CM, IAI );
76357618
76367619 // Get user vectorization factor.
76377620 unsigned UserVF = Hints.getWidth ();
0 commit comments