@@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
67106710 return BlockMaskCache[BB] = BlockMask;
67116711}
67126712
6713- VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory (Instruction *I,
6714- VFRange &Range,
6715- VPlanPtr &Plan) {
6716- const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup (I);
6717- if (!IG)
6718- return nullptr ;
6719-
6720- // Now check if IG is relevant for VF's in the given range.
6721- auto isIGMember = [&](Instruction *I) -> std::function<bool (unsigned )> {
6722- return [=](unsigned VF) -> bool {
6723- return (VF >= 2 && // Query is illegal for VF == 1
6724- CM.getWideningDecision (I, VF) ==
6725- LoopVectorizationCostModel::CM_Interleave);
6726- };
6727- };
6728- if (!LoopVectorizationPlanner::getDecisionAndClampRange (isIGMember (I), Range))
6729- return nullptr ;
6730-
6731- // I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
6732- // range. If it's the primary member of the IG construct a VPInterleaveRecipe.
6733- // Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
6734- assert (I == IG->getInsertPos () &&
6735- " Generating a recipe for an adjunct member of an interleave group" );
6736-
6737- VPValue *Mask = nullptr ;
6738- if (Legal->isMaskRequired (I))
6739- Mask = createBlockInMask (I->getParent (), Plan);
6740-
6741- return new VPInterleaveRecipe (IG, Mask);
6742- }
6743-
67446713VPWidenMemoryInstructionRecipe *
67456714VPRecipeBuilder::tryToWidenMemory (Instruction *I, VFRange &Range,
67466715 VPlanPtr &Plan) {
@@ -6750,15 +6719,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
67506719 auto willWiden = [&](unsigned VF) -> bool {
67516720 if (VF == 1 )
67526721 return false ;
6753- if (CM.isScalarAfterVectorization (I, VF) ||
6754- CM.isProfitableToScalarize (I, VF))
6755- return false ;
67566722 LoopVectorizationCostModel::InstWidening Decision =
67576723 CM.getWideningDecision (I, VF);
67586724 assert (Decision != LoopVectorizationCostModel::CM_Unknown &&
67596725 " CM decision should be taken at this point." );
6760- assert (Decision != LoopVectorizationCostModel::CM_Interleave &&
6761- " Interleave memory opportunity should be caught earlier." );
6726+ if (Decision == LoopVectorizationCostModel::CM_Interleave)
6727+ return true ;
6728+ if (CM.isScalarAfterVectorization (I, VF) ||
6729+ CM.isProfitableToScalarize (I, VF))
6730+ return false ;
67626731 return Decision != LoopVectorizationCostModel::CM_Scalarize;
67636732 };
67646733
@@ -6923,15 +6892,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
69236892 if (!LoopVectorizationPlanner::getDecisionAndClampRange (willWiden, Range))
69246893 return false ;
69256894
6895+ // If this ingredient's recipe is to be recorded, keep its recipe a singleton
6896+ // to avoid having to split recipes later.
6897+ bool IsSingleton = Ingredient2Recipe.count (I);
6898+
69266899 // Success: widen this instruction. We optimize the common case where
69276900 // consecutive instructions can be represented by a single recipe.
6928- if (!VPBB->empty ()) {
6929- VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back ());
6930- if (LastWidenRecipe && LastWidenRecipe->appendInstruction (I))
6931- return true ;
6932- }
6901+ if (!IsSingleton && !VPBB->empty () && LastExtensibleRecipe == &VPBB->back () &&
6902+ LastExtensibleRecipe->appendInstruction (I))
6903+ return true ;
69336904
6934- VPBB->appendRecipe (new VPWidenRecipe (I));
6905+ VPWidenRecipe *WidenRecipe = new VPWidenRecipe (I);
6906+ if (!IsSingleton)
6907+ LastExtensibleRecipe = WidenRecipe;
6908+ setRecipe (I, WidenRecipe);
6909+ VPBB->appendRecipe (WidenRecipe);
69356910 return true ;
69366911}
69376912
@@ -6947,6 +6922,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
69476922 [&](unsigned VF) { return CM.isScalarWithPredication (I, VF); }, Range);
69486923
69496924 auto *Recipe = new VPReplicateRecipe (I, IsUniform, IsPredicated);
6925+ setRecipe (I, Recipe);
69506926
69516927 // Find if I uses a predicated instruction. If so, it will use its scalar
69526928 // value. Avoid hoisting the insert-element which packs the scalar value into
@@ -7005,36 +6981,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
70056981bool VPRecipeBuilder::tryToCreateRecipe (Instruction *Instr, VFRange &Range,
70066982 VPlanPtr &Plan, VPBasicBlock *VPBB) {
70076983 VPRecipeBase *Recipe = nullptr ;
7008- // Check if Instr should belong to an interleave memory recipe, or already
7009- // does. In the latter case Instr is irrelevant.
7010- if ((Recipe = tryToInterleaveMemory (Instr, Range, Plan))) {
7011- VPBB->appendRecipe (Recipe);
7012- return true ;
7013- }
7014-
7015- // Check if Instr is a memory operation that should be widened.
7016- if ((Recipe = tryToWidenMemory (Instr, Range, Plan))) {
7017- VPBB->appendRecipe (Recipe);
7018- return true ;
7019- }
70206984
7021- // Check if Instr should form some PHI recipe.
7022- if ((Recipe = tryToOptimizeInduction (Instr, Range))) {
7023- VPBB->appendRecipe (Recipe);
7024- return true ;
7025- }
7026- if ((Recipe = tryToBlend (Instr, Plan))) {
6985+ // First, check for specific widening recipes that deal with memory
6986+ // operations, inductions and Phi nodes.
6987+ if ((Recipe = tryToWidenMemory (Instr, Range, Plan)) ||
6988+ (Recipe = tryToOptimizeInduction (Instr, Range)) ||
6989+ (Recipe = tryToBlend (Instr, Plan)) ||
6990+ (isa<PHINode>(Instr) &&
6991+ (Recipe = new VPWidenPHIRecipe (cast<PHINode>(Instr))))) {
6992+ setRecipe (Instr, Recipe);
70276993 VPBB->appendRecipe (Recipe);
70286994 return true ;
70296995 }
7030- if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
7031- VPBB->appendRecipe (new VPWidenPHIRecipe (Phi));
7032- return true ;
7033- }
70346996
7035- // Check if Instr is to be widened by a general VPWidenRecipe, after
7036- // having first checked for specific widening recipes that deal with
7037- // Interleave Groups, Inductions and Phi nodes.
6997+ // Check if Instr is to be widened by a general VPWidenRecipe.
70386998 if (tryToWiden (Instr, VPBB, Range))
70396999 return true ;
70407000
@@ -7090,19 +7050,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
70907050VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes (
70917051 VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
70927052 SmallPtrSetImpl<Instruction *> &DeadInstructions) {
7053+
70937054 // Hold a mapping from predicated instructions to their recipes, in order to
70947055 // fix their AlsoPack behavior if a user is determined to replicate and use a
70957056 // scalar instead of vector value.
70967057 DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
70977058
70987059 DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter ();
7099- DenseMap<Instruction *, Instruction *> SinkAfterInverse;
7060+
7061+ SmallPtrSet<const InterleaveGroup<Instruction> *, 1 > InterleaveGroups;
7062+
7063+ VPRecipeBuilder RecipeBuilder (OrigLoop, TLI, Legal, CM, Builder);
7064+
7065+ // ---------------------------------------------------------------------------
7066+ // Pre-construction: record ingredients whose recipes we'll need to further
7067+ // process after constructing the initial VPlan.
7068+ // ---------------------------------------------------------------------------
7069+
7070+ // Mark instructions we'll need to sink later and their targets as
7071+ // ingredients whose recipe we'll need to record.
7072+ for (auto &Entry : SinkAfter) {
7073+ RecipeBuilder.recordRecipeOf (Entry.first );
7074+ RecipeBuilder.recordRecipeOf (Entry.second );
7075+ }
7076+
7077+ // For each interleave group which is relevant for this (possibly trimmed)
7078+ // Range, add it to the set of groups to be later applied to the VPlan and add
7079+ // placeholders for its members' Recipes which we'll be replacing with a
7080+ // single VPInterleaveRecipe.
7081+ for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups ()) {
7082+ auto applyIG = [IG, this ](unsigned VF) -> bool {
7083+ return (VF >= 2 && // Query is illegal for VF == 1
7084+ CM.getWideningDecision (IG->getInsertPos (), VF) ==
7085+ LoopVectorizationCostModel::CM_Interleave);
7086+ };
7087+ if (!getDecisionAndClampRange (applyIG, Range))
7088+ continue ;
7089+ InterleaveGroups.insert (IG);
7090+ for (unsigned i = 0 ; i < IG->getFactor (); i++)
7091+ if (Instruction *Member = IG->getMember (i))
7092+ RecipeBuilder.recordRecipeOf (Member);
7093+ };
7094+
7095+ // ---------------------------------------------------------------------------
7096+ // Build initial VPlan: Scan the body of the loop in a topological order to
7097+ // visit each basic block after having visited its predecessor basic blocks.
7098+ // ---------------------------------------------------------------------------
71007099
71017100 // Create a dummy pre-entry VPBasicBlock to start building the VPlan.
71027101 VPBasicBlock *VPBB = new VPBasicBlock (" Pre-Entry" );
71037102 auto Plan = std::make_unique<VPlan>(VPBB);
71047103
7105- VPRecipeBuilder RecipeBuilder (OrigLoop, TLI, Legal, CM, Builder);
71067104 // Represent values that will have defs inside VPlan.
71077105 for (Value *V : NeedDef)
71087106 Plan->addVPValue (V);
@@ -7123,8 +7121,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71237121
71247122 std::vector<Instruction *> Ingredients;
71257123
7126- // Organize the ingredients to vectorize from current basic block in the
7127- // right order.
7124+ // Introduce each ingredient into VPlan.
71287125 for (Instruction &I : BB->instructionsWithoutDebug ()) {
71297126 Instruction *Instr = &I;
71307127
@@ -7134,43 +7131,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71347131 DeadInstructions.find (Instr) != DeadInstructions.end ())
71357132 continue ;
71367133
7137- // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
7138- // member of the IG, do not construct any Recipe for it.
7139- const InterleaveGroup<Instruction> *IG =
7140- CM.getInterleavedAccessGroup (Instr);
7141- if (IG && Instr != IG->getInsertPos () &&
7142- Range.Start >= 2 && // Query is illegal for VF == 1
7143- CM.getWideningDecision (Instr, Range.Start ) ==
7144- LoopVectorizationCostModel::CM_Interleave) {
7145- auto SinkCandidate = SinkAfterInverse.find (Instr);
7146- if (SinkCandidate != SinkAfterInverse.end ())
7147- Ingredients.push_back (SinkCandidate->second );
7148- continue ;
7149- }
7150-
7151- // Move instructions to handle first-order recurrences, step 1: avoid
7152- // handling this instruction until after we've handled the instruction it
7153- // should follow.
7154- auto SAIt = SinkAfter.find (Instr);
7155- if (SAIt != SinkAfter.end ()) {
7156- LLVM_DEBUG (dbgs () << " Sinking" << *SAIt->first << " after"
7157- << *SAIt->second
7158- << " to vectorize a 1st order recurrence.\n " );
7159- SinkAfterInverse[SAIt->second ] = Instr;
7160- continue ;
7161- }
7162-
7163- Ingredients.push_back (Instr);
7164-
7165- // Move instructions to handle first-order recurrences, step 2: push the
7166- // instruction to be sunk at its insertion point.
7167- auto SAInvIt = SinkAfterInverse.find (Instr);
7168- if (SAInvIt != SinkAfterInverse.end ())
7169- Ingredients.push_back (SAInvIt->second );
7170- }
7171-
7172- // Introduce each ingredient into VPlan.
7173- for (Instruction *Instr : Ingredients) {
71747134 if (RecipeBuilder.tryToCreateRecipe (Instr, Range, Plan, VPBB))
71757135 continue ;
71767136
@@ -7195,6 +7155,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
71957155 VPBlockUtils::disconnectBlocks (PreEntry, Entry);
71967156 delete PreEntry;
71977157
7158+ // ---------------------------------------------------------------------------
7159+ // Transform initial VPlan: Apply previously taken decisions, in order, to
7160+ // bring the VPlan to its final state.
7161+ // ---------------------------------------------------------------------------
7162+
7163+ // Apply Sink-After legal constraints.
7164+ for (auto &Entry : SinkAfter) {
7165+ VPRecipeBase *Sink = RecipeBuilder.getRecipe (Entry.first );
7166+ VPRecipeBase *Target = RecipeBuilder.getRecipe (Entry.second );
7167+ Sink->moveAfter (Target);
7168+ }
7169+
7170+ // Interleave memory: for each Interleave Group we marked earlier as relevant
7171+ // for this VPlan, replace the Recipes widening its memory instructions with a
7172+ // single VPInterleaveRecipe at its insertion point.
7173+ for (auto IG : InterleaveGroups) {
7174+ auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
7175+ RecipeBuilder.getRecipe (IG->getInsertPos ()));
7176+ (new VPInterleaveRecipe (IG, Recipe->getMask ()))->insertBefore (Recipe);
7177+
7178+ for (unsigned i = 0 ; i < IG->getFactor (); ++i)
7179+ if (Instruction *Member = IG->getMember (i)) {
7180+ RecipeBuilder.getRecipe (Member)->eraseFromParent ();
7181+ }
7182+ }
7183+
71987184 // Finally, if tail is folded by masking, introduce selects between the phi
71997185 // and the live-out instruction of each reduction, at the end of the latch.
72007186 if (CM.foldTailByMasking ()) {
@@ -7427,12 +7413,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
74277413}
74287414
74297415void VPWidenMemoryInstructionRecipe::execute (VPTransformState &State) {
7430- if (!User)
7416+ VPValue *Mask = getMask ();
7417+ if (!Mask)
74317418 return State.ILV ->vectorizeMemoryInstruction (&Instr);
74327419
7433- // Last (and currently only) operand is a mask.
74347420 InnerLoopVectorizer::VectorParts MaskValues (State.UF );
7435- VPValue *Mask = User->getOperand (User->getNumOperands () - 1 );
74367421 for (unsigned Part = 0 ; Part < State.UF ; ++Part)
74377422 MaskValues[Part] = State.get (Mask, Part);
74387423 State.ILV ->vectorizeMemoryInstruction (&Instr, &MaskValues);
@@ -7481,7 +7466,7 @@ static bool processLoopInVPlanNativePath(
74817466 // Use the planner for outer loop vectorization.
74827467 // TODO: CM is not used at this point inside the planner. Turn CM into an
74837468 // optional argument if we don't need it in the future.
7484- LoopVectorizationPlanner LVP (L, LI, TLI, TTI, LVL, CM);
7469+ LoopVectorizationPlanner LVP (L, LI, TLI, TTI, LVL, CM, IAI );
74857470
74867471 // Get user vectorization factor.
74877472 const unsigned UserVF = Hints.getWidth ();
@@ -7641,7 +7626,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
76417626 CM.collectValuesToIgnore ();
76427627
76437628 // Use the planner for vectorization.
7644- LoopVectorizationPlanner LVP (L, LI, TLI, TTI, &LVL, CM);
7629+ LoopVectorizationPlanner LVP (L, LI, TLI, TTI, &LVL, CM, IAI );
76457630
76467631 // Get user vectorization factor.
76477632 unsigned UserVF = Hints.getWidth ();
0 commit comments