Skip to content

Commit 39ccc09

Browse files
committed
[LV] Record GEP widening decisions in recipe (NFCI)
InnerLoopVectorizer's code called during VPlan execution still relies on original IR's def-use relations to decide which vector code to generate, limiting VPlan transformations ability to modify def-use relations and still have ILV generate the vector code. This commit moves GEP operand queries controlling how GEPs are widened to a dedicated recipe and extracts GEP widening code to its own ILV method taking those recorded decisions as arguments. This reduces ingredient def-use usage by ILV as a step towards full VPlan-based def-use relations. Differential revision: https://reviews.llvm.org/D69067
1 parent b31a531 commit 39ccc09

File tree

6 files changed

+152
-81
lines changed

6 files changed

+152
-81
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 101 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,11 @@ class InnerLoopVectorizer {
428428
/// new unrolled loop, where UF is the unroll factor.
429429
using VectorParts = SmallVector<Value *, 2>;
430430

431+
/// Vectorize a single GetElementPtrInst based on information gathered and
432+
/// decisions taken during planning.
433+
void widenGEP(GetElementPtrInst *GEP, unsigned UF, unsigned VF,
434+
bool IsPtrLoopInvariant, SmallBitVector &IsIndexLoopInvariant);
435+
431436
/// Vectorize a single PHINode in a block. This method handles the induction
432437
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
433438
/// arbitrary length vectors.
@@ -3961,6 +3966,75 @@ void InnerLoopVectorizer::fixNonInductionPHIs() {
39613966
}
39623967
}
39633968

3969+
void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, unsigned UF,
3970+
unsigned VF, bool IsPtrLoopInvariant,
3971+
SmallBitVector &IsIndexLoopInvariant) {
3972+
// Construct a vector GEP by widening the operands of the scalar GEP as
3973+
// necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
3974+
// results in a vector of pointers when at least one operand of the GEP
3975+
// is vector-typed. Thus, to keep the representation compact, we only use
3976+
// vector-typed operands for loop-varying values.
3977+
3978+
if (VF > 1 && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
3979+
// If we are vectorizing, but the GEP has only loop-invariant operands,
3980+
// the GEP we build (by only using vector-typed operands for
3981+
// loop-varying values) would be a scalar pointer. Thus, to ensure we
3982+
// produce a vector of pointers, we need to either arbitrarily pick an
3983+
// operand to broadcast, or broadcast a clone of the original GEP.
3984+
// Here, we broadcast a clone of the original.
3985+
//
3986+
// TODO: If at some point we decide to scalarize instructions having
3987+
// loop-invariant operands, this special case will no longer be
3988+
// required. We would add the scalarization decision to
3989+
// collectLoopScalars() and teach getVectorValue() to broadcast
3990+
// the lane-zero scalar value.
3991+
auto *Clone = Builder.Insert(GEP->clone());
3992+
for (unsigned Part = 0; Part < UF; ++Part) {
3993+
Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
3994+
VectorLoopValueMap.setVectorValue(GEP, Part, EntryPart);
3995+
addMetadata(EntryPart, GEP);
3996+
}
3997+
} else {
3998+
// If the GEP has at least one loop-varying operand, we are sure to
3999+
// produce a vector of pointers. But if we are only unrolling, we want
4000+
// to produce a scalar GEP for each unroll part. Thus, the GEP we
4001+
// produce with the code below will be scalar (if VF == 1) or vector
4002+
// (otherwise). Note that for the unroll-only case, we still maintain
4003+
// values in the vector mapping with initVector, as we do for other
4004+
// instructions.
4005+
for (unsigned Part = 0; Part < UF; ++Part) {
4006+
// The pointer operand of the new GEP. If it's loop-invariant, we
4007+
// won't broadcast it.
4008+
auto *Ptr = IsPtrLoopInvariant
4009+
? GEP->getPointerOperand()
4010+
: getOrCreateVectorValue(GEP->getPointerOperand(), Part);
4011+
4012+
// Collect all the indices for the new GEP. If any index is
4013+
// loop-invariant, we won't broadcast it.
4014+
SmallVector<Value *, 4> Indices;
4015+
for (auto Index : enumerate(GEP->indices())) {
4016+
Value *User = Index.value().get();
4017+
if (IsIndexLoopInvariant[Index.index()])
4018+
Indices.push_back(User);
4019+
else
4020+
Indices.push_back(getOrCreateVectorValue(User, Part));
4021+
}
4022+
4023+
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
4024+
// but it should be a vector, otherwise.
4025+
auto *NewGEP =
4026+
GEP->isInBounds()
4027+
? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
4028+
Indices)
4029+
: Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
4030+
assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
4031+
"NewGEP is not a pointer vector");
4032+
VectorLoopValueMap.setVectorValue(GEP, Part, NewGEP);
4033+
addMetadata(NewGEP, GEP);
4034+
}
4035+
}
4036+
}
4037+
39644038
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
39654039
unsigned VF) {
39664040
PHINode *P = cast<PHINode>(PN);
@@ -4063,76 +4137,8 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
40634137
switch (I.getOpcode()) {
40644138
case Instruction::Br:
40654139
case Instruction::PHI:
4140+
case Instruction::GetElementPtr:
40664141
llvm_unreachable("This instruction is handled by a different recipe.");
4067-
case Instruction::GetElementPtr: {
4068-
// Construct a vector GEP by widening the operands of the scalar GEP as
4069-
// necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
4070-
// results in a vector of pointers when at least one operand of the GEP
4071-
// is vector-typed. Thus, to keep the representation compact, we only use
4072-
// vector-typed operands for loop-varying values.
4073-
auto *GEP = cast<GetElementPtrInst>(&I);
4074-
4075-
if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) {
4076-
// If we are vectorizing, but the GEP has only loop-invariant operands,
4077-
// the GEP we build (by only using vector-typed operands for
4078-
// loop-varying values) would be a scalar pointer. Thus, to ensure we
4079-
// produce a vector of pointers, we need to either arbitrarily pick an
4080-
// operand to broadcast, or broadcast a clone of the original GEP.
4081-
// Here, we broadcast a clone of the original.
4082-
//
4083-
// TODO: If at some point we decide to scalarize instructions having
4084-
// loop-invariant operands, this special case will no longer be
4085-
// required. We would add the scalarization decision to
4086-
// collectLoopScalars() and teach getVectorValue() to broadcast
4087-
// the lane-zero scalar value.
4088-
auto *Clone = Builder.Insert(GEP->clone());
4089-
for (unsigned Part = 0; Part < UF; ++Part) {
4090-
Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
4091-
VectorLoopValueMap.setVectorValue(&I, Part, EntryPart);
4092-
addMetadata(EntryPart, GEP);
4093-
}
4094-
} else {
4095-
// If the GEP has at least one loop-varying operand, we are sure to
4096-
// produce a vector of pointers. But if we are only unrolling, we want
4097-
// to produce a scalar GEP for each unroll part. Thus, the GEP we
4098-
// produce with the code below will be scalar (if VF == 1) or vector
4099-
// (otherwise). Note that for the unroll-only case, we still maintain
4100-
// values in the vector mapping with initVector, as we do for other
4101-
// instructions.
4102-
for (unsigned Part = 0; Part < UF; ++Part) {
4103-
// The pointer operand of the new GEP. If it's loop-invariant, we
4104-
// won't broadcast it.
4105-
auto *Ptr =
4106-
OrigLoop->isLoopInvariant(GEP->getPointerOperand())
4107-
? GEP->getPointerOperand()
4108-
: getOrCreateVectorValue(GEP->getPointerOperand(), Part);
4109-
4110-
// Collect all the indices for the new GEP. If any index is
4111-
// loop-invariant, we won't broadcast it.
4112-
SmallVector<Value *, 4> Indices;
4113-
for (auto &U : make_range(GEP->idx_begin(), GEP->idx_end())) {
4114-
if (OrigLoop->isLoopInvariant(U.get()))
4115-
Indices.push_back(U.get());
4116-
else
4117-
Indices.push_back(getOrCreateVectorValue(U.get(), Part));
4118-
}
4119-
4120-
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
4121-
// but it should be a vector, otherwise.
4122-
auto *NewGEP =
4123-
GEP->isInBounds()
4124-
? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
4125-
Indices)
4126-
: Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
4127-
assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
4128-
"NewGEP is not a pointer vector");
4129-
VectorLoopValueMap.setVectorValue(&I, Part, NewGEP);
4130-
addMetadata(NewGEP, GEP);
4131-
}
4132-
}
4133-
4134-
break;
4135-
}
41364142
case Instruction::UDiv:
41374143
case Instruction::SDiv:
41384144
case Instruction::SRem:
@@ -6831,7 +6837,6 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
68316837
case Instruction::FPTrunc:
68326838
case Instruction::FRem:
68336839
case Instruction::FSub:
6834-
case Instruction::GetElementPtr:
68356840
case Instruction::ICmp:
68366841
case Instruction::IntToPtr:
68376842
case Instruction::Load:
@@ -6896,12 +6901,13 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
68966901

68976902
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
68986903
return false;
6899-
69006904
// If this ingredient's recipe is to be recorded, keep its recipe a singleton
69016905
// to avoid having to split recipes later.
69026906
bool IsSingleton = Ingredient2Recipe.count(I);
69036907

6904-
// Success: widen this instruction. We optimize the common case where
6908+
// Success: widen this instruction.
6909+
6910+
// Use the default widening recipe. We optimize the common case where
69056911
// consecutive instructions can be represented by a single recipe.
69066912
if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() &&
69076913
LastExtensibleRecipe->appendInstruction(I))
@@ -6999,7 +7005,23 @@ bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
69997005
return true;
70007006
}
70017007

7002-
// Check if Instr is to be widened by a general VPWidenRecipe.
7008+
// Handle GEP widening.
7009+
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
7010+
auto Scalarize = [&](unsigned VF) {
7011+
return CM.isScalarWithPredication(Instr, VF) ||
7012+
CM.isScalarAfterVectorization(Instr, VF) ||
7013+
CM.isProfitableToScalarize(Instr, VF);
7014+
};
7015+
if (LoopVectorizationPlanner::getDecisionAndClampRange(Scalarize, Range))
7016+
return false;
7017+
VPWidenGEPRecipe *Recipe = new VPWidenGEPRecipe(GEP, OrigLoop);
7018+
setRecipe(Instr, Recipe);
7019+
VPBB->appendRecipe(Recipe);
7020+
return true;
7021+
}
7022+
7023+
// Check if Instr is to be widened by a general VPWidenRecipe, after
7024+
// having first checked for specific widening recipes.
70037025
if (tryToWiden(Instr, VPBB, Range))
70047026
return true;
70057027

@@ -7241,7 +7263,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
72417263

72427264
SmallPtrSet<Instruction *, 1> DeadInstructions;
72437265
VPlanHCFGTransforms::VPInstructionsToVPRecipes(
7244-
Plan, Legal->getInductionVars(), DeadInstructions);
7266+
OrigLoop, Plan, Legal->getInductionVars(), DeadInstructions);
72457267

72467268
return Plan;
72477269
}
@@ -7271,6 +7293,11 @@ void VPWidenRecipe::execute(VPTransformState &State) {
72717293
State.ILV->widenInstruction(Instr);
72727294
}
72737295

7296+
void VPWidenGEPRecipe::execute(VPTransformState &State) {
7297+
State.ILV->widenGEP(GEP, State.UF, State.VF, IsPtrLoopInvariant,
7298+
IsIndexLoopInvariant);
7299+
}
7300+
72747301
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
72757302
assert(!State.Instance && "Int or FP induction being replicated.");
72767303
State.ILV->widenIntOrFpInduction(IV, Trunc);

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,16 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O,
683683
O << " " << VPlanIngredient(IV) << "\\l\"";
684684
}
685685

686+
void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent) const {
687+
O << " +\n" << Indent << "\"WIDEN-GEP ";
688+
O << (IsPtrLoopInvariant ? "Inv" : "Var");
689+
size_t IndicesNumber = IsIndexLoopInvariant.size();
690+
for (size_t I = 0; I < IndicesNumber; ++I)
691+
O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
692+
O << "\\l\"";
693+
O << " +\n" << Indent << "\" " << VPlanIngredient(GEP) << "\\l\"";
694+
}
695+
686696
void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent) const {
687697
O << " +\n" << Indent << "\"WIDEN-PHI " << VPlanIngredient(Phi) << "\\l\"";
688698
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "llvm/ADT/DepthFirstIterator.h"
3232
#include "llvm/ADT/GraphTraits.h"
3333
#include "llvm/ADT/Optional.h"
34+
#include "llvm/ADT/SmallBitVector.h"
3435
#include "llvm/ADT/SmallPtrSet.h"
3536
#include "llvm/ADT/SmallSet.h"
3637
#include "llvm/ADT/SmallVector.h"
@@ -587,6 +588,7 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
587588
VPInterleaveSC,
588589
VPPredInstPHISC,
589590
VPReplicateSC,
591+
VPWidenGEPSC,
590592
VPWidenIntOrFpInductionSC,
591593
VPWidenMemoryInstructionSC,
592594
VPWidenPHISC,
@@ -749,6 +751,36 @@ class VPWidenRecipe : public VPRecipeBase {
749751
void print(raw_ostream &O, const Twine &Indent) const override;
750752
};
751753

754+
/// A recipe for handling GEP instructions.
755+
class VPWidenGEPRecipe : public VPRecipeBase {
756+
private:
757+
GetElementPtrInst *GEP;
758+
bool IsPtrLoopInvariant;
759+
SmallBitVector IsIndexLoopInvariant;
760+
761+
public:
762+
VPWidenGEPRecipe(GetElementPtrInst *GEP, Loop *OrigLoop)
763+
: VPRecipeBase(VPWidenGEPSC), GEP(GEP),
764+
IsIndexLoopInvariant(GEP->getNumIndices(), false) {
765+
IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
766+
for (auto Index : enumerate(GEP->indices()))
767+
IsIndexLoopInvariant[Index.index()] =
768+
OrigLoop->isLoopInvariant(Index.value().get());
769+
}
770+
~VPWidenGEPRecipe() override = default;
771+
772+
/// Method to support type inquiry through isa, cast, and dyn_cast.
773+
static inline bool classof(const VPRecipeBase *V) {
774+
return V->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC;
775+
}
776+
777+
/// Generate the gep nodes.
778+
void execute(VPTransformState &State) override;
779+
780+
/// Print the recipe.
781+
void print(raw_ostream &O, const Twine &Indent) const override;
782+
};
783+
752784
/// A recipe for handling phi nodes of integer and floating-point inductions,
753785
/// producing their vector and scalar values.
754786
class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {

llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
using namespace llvm;
1818

1919
void VPlanHCFGTransforms::VPInstructionsToVPRecipes(
20-
VPlanPtr &Plan,
20+
Loop *OrigLoop, VPlanPtr &Plan,
2121
LoopVectorizationLegality::InductionList *Inductions,
2222
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
2323

@@ -64,6 +64,8 @@ void VPlanHCFGTransforms::VPInstructionsToVPRecipes(
6464
NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi);
6565
} else
6666
NewRecipe = new VPWidenPHIRecipe(Phi);
67+
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
68+
NewRecipe = new VPWidenGEPRecipe(GEP, OrigLoop);
6769
} else {
6870
// If the last recipe is a VPWidenRecipe, add Inst to it instead of
6971
// creating a new recipe.

llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class VPlanHCFGTransforms {
2525
/// Replaces the VPInstructions in \p Plan with corresponding
2626
/// widen recipes.
2727
static void VPInstructionsToVPRecipes(
28-
VPlanPtr &Plan,
28+
Loop *OrigLoop, VPlanPtr &Plan,
2929
LoopVectorizationLegality::InductionList *Inductions,
3030
SmallPtrSetImpl<Instruction *> &DeadInstructions);
3131
};

llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) {
8989

9090
LoopVectorizationLegality::InductionList Inductions;
9191
SmallPtrSet<Instruction *, 1> DeadInstructions;
92-
VPlanHCFGTransforms::VPInstructionsToVPRecipes(Plan, &Inductions,
93-
DeadInstructions);
92+
VPlanHCFGTransforms::VPInstructionsToVPRecipes(
93+
LI->getLoopFor(LoopHeader), Plan, &Inductions, DeadInstructions);
9494
}
9595

9696
TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
@@ -119,8 +119,8 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
119119

120120
LoopVectorizationLegality::InductionList Inductions;
121121
SmallPtrSet<Instruction *, 1> DeadInstructions;
122-
VPlanHCFGTransforms::VPInstructionsToVPRecipes(Plan, &Inductions,
123-
DeadInstructions);
122+
VPlanHCFGTransforms::VPInstructionsToVPRecipes(
123+
LI->getLoopFor(LoopHeader), Plan, &Inductions, DeadInstructions);
124124

125125
VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
126126
EXPECT_NE(nullptr, Entry->getSingleSuccessor());
@@ -136,7 +136,7 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
136136
auto *Phi = dyn_cast<VPWidenPHIRecipe>(&*Iter++);
137137
EXPECT_NE(nullptr, Phi);
138138

139-
auto *Idx = dyn_cast<VPWidenRecipe>(&*Iter++);
139+
auto *Idx = dyn_cast<VPWidenGEPRecipe>(&*Iter++);
140140
EXPECT_NE(nullptr, Idx);
141141

142142
auto *Load = dyn_cast<VPWidenMemoryInstructionRecipe>(&*Iter++);

0 commit comments

Comments
 (0)