@@ -428,6 +428,11 @@ class InnerLoopVectorizer {
428428 // / new unrolled loop, where UF is the unroll factor.
429429 using VectorParts = SmallVector<Value *, 2 >;
430430
431+ // / Vectorize a single GetElementPtrInst based on information gathered and
432+ // / decisions taken during planning.
433+ void widenGEP (GetElementPtrInst *GEP, unsigned UF, unsigned VF,
434+ bool IsPtrLoopInvariant, SmallBitVector &IsIndexLoopInvariant);
435+
431436 // / Vectorize a single PHINode in a block. This method handles the induction
432437 // / variable canonicalization. It supports both VF = 1 for unrolled loops and
433438 // / arbitrary length vectors.
@@ -3961,6 +3966,75 @@ void InnerLoopVectorizer::fixNonInductionPHIs() {
39613966 }
39623967}
39633968
3969+ void InnerLoopVectorizer::widenGEP (GetElementPtrInst *GEP, unsigned UF,
3970+ unsigned VF, bool IsPtrLoopInvariant,
3971+ SmallBitVector &IsIndexLoopInvariant) {
3972+ // Construct a vector GEP by widening the operands of the scalar GEP as
3973+ // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
3974+ // results in a vector of pointers when at least one operand of the GEP
3975+ // is vector-typed. Thus, to keep the representation compact, we only use
3976+ // vector-typed operands for loop-varying values.
3977+
3978+ if (VF > 1 && IsPtrLoopInvariant && IsIndexLoopInvariant.all ()) {
3979+ // If we are vectorizing, but the GEP has only loop-invariant operands,
3980+ // the GEP we build (by only using vector-typed operands for
3981+ // loop-varying values) would be a scalar pointer. Thus, to ensure we
3982+ // produce a vector of pointers, we need to either arbitrarily pick an
3983+ // operand to broadcast, or broadcast a clone of the original GEP.
3984+ // Here, we broadcast a clone of the original.
3985+ //
3986+ // TODO: If at some point we decide to scalarize instructions having
3987+ // loop-invariant operands, this special case will no longer be
3988+ // required. We would add the scalarization decision to
3989+ // collectLoopScalars() and teach getVectorValue() to broadcast
3990+ // the lane-zero scalar value.
3991+ auto *Clone = Builder.Insert (GEP->clone ());
3992+ for (unsigned Part = 0 ; Part < UF; ++Part) {
3993+ Value *EntryPart = Builder.CreateVectorSplat (VF, Clone);
3994+ VectorLoopValueMap.setVectorValue (GEP, Part, EntryPart);
3995+ addMetadata (EntryPart, GEP);
3996+ }
3997+ } else {
3998+ // If the GEP has at least one loop-varying operand, we are sure to
3999+ // produce a vector of pointers. But if we are only unrolling, we want
4000+ // to produce a scalar GEP for each unroll part. Thus, the GEP we
4001+ // produce with the code below will be scalar (if VF == 1) or vector
4002+ // (otherwise). Note that for the unroll-only case, we still maintain
4003+ // values in the vector mapping with initVector, as we do for other
4004+ // instructions.
4005+ for (unsigned Part = 0 ; Part < UF; ++Part) {
4006+ // The pointer operand of the new GEP. If it's loop-invariant, we
4007+ // won't broadcast it.
4008+ auto *Ptr = IsPtrLoopInvariant
4009+ ? GEP->getPointerOperand ()
4010+ : getOrCreateVectorValue (GEP->getPointerOperand (), Part);
4011+
4012+ // Collect all the indices for the new GEP. If any index is
4013+ // loop-invariant, we won't broadcast it.
4014+ SmallVector<Value *, 4 > Indices;
4015+ for (auto Index : enumerate(GEP->indices ())) {
4016+ Value *User = Index.value ().get ();
4017+ if (IsIndexLoopInvariant[Index.index ()])
4018+ Indices.push_back (User);
4019+ else
4020+ Indices.push_back (getOrCreateVectorValue (User, Part));
4021+ }
4022+
4023+ // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
4024+ // but it should be a vector, otherwise.
4025+ auto *NewGEP =
4026+ GEP->isInBounds ()
4027+ ? Builder.CreateInBoundsGEP (GEP->getSourceElementType (), Ptr,
4028+ Indices)
4029+ : Builder.CreateGEP (GEP->getSourceElementType (), Ptr, Indices);
4030+ assert ((VF == 1 || NewGEP->getType ()->isVectorTy ()) &&
4031+ " NewGEP is not a pointer vector" );
4032+ VectorLoopValueMap.setVectorValue (GEP, Part, NewGEP);
4033+ addMetadata (NewGEP, GEP);
4034+ }
4035+ }
4036+ }
4037+
39644038void InnerLoopVectorizer::widenPHIInstruction (Instruction *PN, unsigned UF,
39654039 unsigned VF) {
39664040 PHINode *P = cast<PHINode>(PN);
@@ -4063,76 +4137,8 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
40634137 switch (I.getOpcode ()) {
40644138 case Instruction::Br:
40654139 case Instruction::PHI:
4140+ case Instruction::GetElementPtr:
40664141 llvm_unreachable (" This instruction is handled by a different recipe." );
4067- case Instruction::GetElementPtr: {
4068- // Construct a vector GEP by widening the operands of the scalar GEP as
4069- // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
4070- // results in a vector of pointers when at least one operand of the GEP
4071- // is vector-typed. Thus, to keep the representation compact, we only use
4072- // vector-typed operands for loop-varying values.
4073- auto *GEP = cast<GetElementPtrInst>(&I);
4074-
4075- if (VF > 1 && OrigLoop->hasLoopInvariantOperands (GEP)) {
4076- // If we are vectorizing, but the GEP has only loop-invariant operands,
4077- // the GEP we build (by only using vector-typed operands for
4078- // loop-varying values) would be a scalar pointer. Thus, to ensure we
4079- // produce a vector of pointers, we need to either arbitrarily pick an
4080- // operand to broadcast, or broadcast a clone of the original GEP.
4081- // Here, we broadcast a clone of the original.
4082- //
4083- // TODO: If at some point we decide to scalarize instructions having
4084- // loop-invariant operands, this special case will no longer be
4085- // required. We would add the scalarization decision to
4086- // collectLoopScalars() and teach getVectorValue() to broadcast
4087- // the lane-zero scalar value.
4088- auto *Clone = Builder.Insert (GEP->clone ());
4089- for (unsigned Part = 0 ; Part < UF; ++Part) {
4090- Value *EntryPart = Builder.CreateVectorSplat (VF, Clone);
4091- VectorLoopValueMap.setVectorValue (&I, Part, EntryPart);
4092- addMetadata (EntryPart, GEP);
4093- }
4094- } else {
4095- // If the GEP has at least one loop-varying operand, we are sure to
4096- // produce a vector of pointers. But if we are only unrolling, we want
4097- // to produce a scalar GEP for each unroll part. Thus, the GEP we
4098- // produce with the code below will be scalar (if VF == 1) or vector
4099- // (otherwise). Note that for the unroll-only case, we still maintain
4100- // values in the vector mapping with initVector, as we do for other
4101- // instructions.
4102- for (unsigned Part = 0 ; Part < UF; ++Part) {
4103- // The pointer operand of the new GEP. If it's loop-invariant, we
4104- // won't broadcast it.
4105- auto *Ptr =
4106- OrigLoop->isLoopInvariant (GEP->getPointerOperand ())
4107- ? GEP->getPointerOperand ()
4108- : getOrCreateVectorValue (GEP->getPointerOperand (), Part);
4109-
4110- // Collect all the indices for the new GEP. If any index is
4111- // loop-invariant, we won't broadcast it.
4112- SmallVector<Value *, 4 > Indices;
4113- for (auto &U : make_range (GEP->idx_begin (), GEP->idx_end ())) {
4114- if (OrigLoop->isLoopInvariant (U.get ()))
4115- Indices.push_back (U.get ());
4116- else
4117- Indices.push_back (getOrCreateVectorValue (U.get (), Part));
4118- }
4119-
4120- // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
4121- // but it should be a vector, otherwise.
4122- auto *NewGEP =
4123- GEP->isInBounds ()
4124- ? Builder.CreateInBoundsGEP (GEP->getSourceElementType (), Ptr,
4125- Indices)
4126- : Builder.CreateGEP (GEP->getSourceElementType (), Ptr, Indices);
4127- assert ((VF == 1 || NewGEP->getType ()->isVectorTy ()) &&
4128- " NewGEP is not a pointer vector" );
4129- VectorLoopValueMap.setVectorValue (&I, Part, NewGEP);
4130- addMetadata (NewGEP, GEP);
4131- }
4132- }
4133-
4134- break ;
4135- }
41364142 case Instruction::UDiv:
41374143 case Instruction::SDiv:
41384144 case Instruction::SRem:
@@ -6831,7 +6837,6 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
68316837 case Instruction::FPTrunc:
68326838 case Instruction::FRem:
68336839 case Instruction::FSub:
6834- case Instruction::GetElementPtr:
68356840 case Instruction::ICmp:
68366841 case Instruction::IntToPtr:
68376842 case Instruction::Load:
@@ -6896,12 +6901,13 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
68966901
68976902 if (!LoopVectorizationPlanner::getDecisionAndClampRange (willWiden, Range))
68986903 return false ;
6899-
69006904 // If this ingredient's recipe is to be recorded, keep its recipe a singleton
69016905 // to avoid having to split recipes later.
69026906 bool IsSingleton = Ingredient2Recipe.count (I);
69036907
6904- // Success: widen this instruction. We optimize the common case where
6908+ // Success: widen this instruction.
6909+
6910+ // Use the default widening recipe. We optimize the common case where
69056911 // consecutive instructions can be represented by a single recipe.
69066912 if (!IsSingleton && !VPBB->empty () && LastExtensibleRecipe == &VPBB->back () &&
69076913 LastExtensibleRecipe->appendInstruction (I))
@@ -6999,7 +7005,23 @@ bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
69997005 return true ;
70007006 }
70017007
7002- // Check if Instr is to be widened by a general VPWidenRecipe.
7008+ // Handle GEP widening.
7009+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
7010+ auto Scalarize = [&](unsigned VF) {
7011+ return CM.isScalarWithPredication (Instr, VF) ||
7012+ CM.isScalarAfterVectorization (Instr, VF) ||
7013+ CM.isProfitableToScalarize (Instr, VF);
7014+ };
7015+ if (LoopVectorizationPlanner::getDecisionAndClampRange (Scalarize, Range))
7016+ return false ;
7017+ VPWidenGEPRecipe *Recipe = new VPWidenGEPRecipe (GEP, OrigLoop);
7018+ setRecipe (Instr, Recipe);
7019+ VPBB->appendRecipe (Recipe);
7020+ return true ;
7021+ }
7022+
7023+ // Check if Instr is to be widened by a general VPWidenRecipe, after
7024+ // having first checked for specific widening recipes.
70037025 if (tryToWiden (Instr, VPBB, Range))
70047026 return true ;
70057027
@@ -7241,7 +7263,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
72417263
72427264 SmallPtrSet<Instruction *, 1 > DeadInstructions;
72437265 VPlanHCFGTransforms::VPInstructionsToVPRecipes (
7244- Plan, Legal->getInductionVars (), DeadInstructions);
7266+ OrigLoop, Plan, Legal->getInductionVars (), DeadInstructions);
72457267
72467268 return Plan;
72477269}
@@ -7271,6 +7293,11 @@ void VPWidenRecipe::execute(VPTransformState &State) {
72717293 State.ILV ->widenInstruction (Instr);
72727294}
72737295
7296+ void VPWidenGEPRecipe::execute (VPTransformState &State) {
7297+ State.ILV ->widenGEP (GEP, State.UF , State.VF , IsPtrLoopInvariant,
7298+ IsIndexLoopInvariant);
7299+ }
7300+
72747301void VPWidenIntOrFpInductionRecipe::execute (VPTransformState &State) {
72757302 assert (!State.Instance && " Int or FP induction being replicated." );
72767303 State.ILV ->widenIntOrFpInduction (IV, Trunc);
0 commit comments