- Notifications
You must be signed in to change notification settings - Fork 15.3k
[LAA] Keep track of sink ptr forked by a phi #67223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Given a function like the following: https://godbolt.org/z/nqW3637Mf ``` void s1161_ptr (int *Preds, double *a, double *b, double *c) { for (int i = 0; i < 1000; ++i) { if (Preds[i] != 0) b[i] = a[i] + 1; else a[i] = i * i; } } ``` LLVM will optimize the IR to a single store by a phi instruction: ``` for.body: ; preds = %entry, %for.inc %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] ... if.then: ; preds = %for.body %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv ... br label %for.inc if.else: ... br label %for.inc for.inc: ; preds = %if.then, %if.else %b.sink = phi ptr [ %b, %if.then ], [ %a, %if.else ] %add.sink = phi double [ %add, %if.then ], [ %conv, %if.else ] %arrayidx5 = getelementptr inbounds double, ptr %b.sink, i64 %indvars.iv store double %add.sink, ptr %arrayidx5, align 8 ``` Because array A has read and write operations at the same time in the loop body, we need extra runtime checks for this alias set compare to the previous scenario in commit be92112. To track the forked pointer, we transform it back to 2 PointerInfo entries with separate value just like the IR generated with option -simplifycfg-sink-common=false.
| @llvm/pr-subscribers-llvm-analysis ChangesGiven a function like the following: https://godbolt.org/z/nqW3637Mf LLVM will optimize the IR to a single store by a phi instruction: Because array A has read and write operations at the same time in the loop body, we need extra runtime checks for this alias set compare to the previous scenario in commit be92112. To track the forked pointer, we transform it back to 2 PointerInfo entries with separate value just like the IR generated with option -simplifycfg-sink-common=false. Full diff: https://github.com/llvm/llvm-project/pull/67223.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 245cce25a3d5743..6c775bf6bb70666 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -44,6 +44,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -804,6 +805,7 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE, } static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, + PredicatedScalarEvolution &PSE, function_ref<void(Value *)> AddPointer) { SmallPtrSet<Value *, 8> Visited; SmallVector<Value *> WorkList; @@ -821,8 +823,40 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, PN->getParent() != InnermostLoop.getHeader()) { for (const Use &Inc : PN->incoming_values()) WorkList.push_back(Inc); - } else + } else { + auto *GEP = dyn_cast<GetElementPtrInst>(Ptr); + ScalarEvolution &SE = *PSE.getSE(); + // Decompose the GEP when its offset is a induction variable. + if (GEP && GEP->getNumOperands() == 2 && + isa<SCEVAddExpr>(SE.getSCEV(GEP)) && + !InnermostLoop.isLoopInvariant(GEP->getOperand(0))) { + if (auto *PhiI = dyn_cast<PHINode>(GEP->getOperand(0))) { + const SCEV *Offset = SE.getSCEV(GEP->getOperand(1)); + if (PhiI->getNumOperands() == 2 && isa<SCEVAddRecExpr>(Offset) && + InnermostLoop.isLoopInvariant(PhiI->getIncomingValue(0)) && + InnermostLoop.isLoopInvariant(PhiI->getIncomingValue(1))) { + // Transform back to 2 PointerInfo entries with separate values to + // track the forked pointer. + Type *SourceTy = GEP->getResultElementType(); + bool IsInBounds = GEP->isInBounds(); + SmallVector<Value *, 4> Index(GEP->indices()); + IRBuilder<> BuilderA(PhiI->getIncomingBlock(0)->getTerminator()); + Value *In0 = PhiI->getIncomingValue(0); + auto *GEPA = BuilderA.CreateGEP( + SourceTy, In0, Index, GEP->getName() + ".in0", IsInBounds); + IRBuilder<> BuilderB(PhiI->getIncomingBlock(1)->getTerminator()); + Value *In1 = PhiI->getIncomingValue(1); + auto *GEPB = BuilderA.CreateGEP( + SourceTy, In1, Index, GEP->getName() + ".in1", IsInBounds); + AddPointer(GEPA); + AddPointer(GEPB); + continue; + } + } + } + AddPointer(Ptr); + } } } @@ -1634,7 +1668,7 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, } void MemoryDepChecker::addAccess(StoreInst *SI) { - visitPointers(SI->getPointerOperand(), *InnermostLoop, + visitPointers(SI->getPointerOperand(), *InnermostLoop, PSE, [this, SI](Value *Ptr) { Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx); InstMap.push_back(SI); @@ -1643,7 +1677,7 @@ void MemoryDepChecker::addAccess(StoreInst *SI) { } void MemoryDepChecker::addAccess(LoadInst *LI) { - visitPointers(LI->getPointerOperand(), *InnermostLoop, + visitPointers(LI->getPointerOperand(), *InnermostLoop, PSE, [this, LI](Value *Ptr) { Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx); InstMap.push_back(LI); @@ -2367,7 +2401,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, if (blockNeedsPredication(ST->getParent(), TheLoop, DT)) Loc.AATags.TBAA = nullptr; - visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, + visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, *PSE, [&Accesses, AccessTy, Loc](Value *Ptr) { MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); Accesses.addStore(NewLoc, AccessTy); @@ -2416,7 +2450,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, if (blockNeedsPredication(LD->getParent(), TheLoop, DT)) Loc.AATags.TBAA = nullptr; - visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, + visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, *PSE, [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) { MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr); diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll index 30ffc4cb5e1e85f..3baac4d34e6078f 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll @@ -944,32 +944,37 @@ define void @forked_ptrs_with_different_base(ptr nocapture readonly %Preds, ptr ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group ([[G1:.+]]): -; CHECK-NEXT: %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv +; CHECK-NEXT: %arrayidx7.in01 = getelementptr inbounds double, ptr %1, i64 %indvars.iv ; CHECK-NEXT: Against group ([[G2:.+]]): -; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv +; CHECK-NEXT: %arrayidx7.in12 = getelementptr inbounds double, ptr %2, i64 %indvars.iv ; CHECK-NEXT: Check 1: ; CHECK-NEXT: Comparing group ([[G1]]): -; CHECK-NEXT: %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv +; CHECK-NEXT: %arrayidx7.in01 = getelementptr inbounds double, ptr %1, i64 %indvars.iv +; CHECK-NEXT: Against group ([[G3:.+]]): +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv +; CHECK-NEXT: Check 2: +; CHECK-NEXT: Comparing group ([[G1]]): +; CHECK-NEXT: %arrayidx7.in01 = getelementptr inbounds double, ptr %1, i64 %indvars.iv ; CHECK-NEXT: Against group ([[G4:.+]]): ; CHECK-NEXT: %arrayidx5 = getelementptr inbounds double, ptr %0, i64 %indvars.iv -; CHECK-NEXT: Check 2: -; CHECK-NEXT: Comparing group ([[G3:.+]]): -; CHECK-NEXT: %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv -; CHECK-NEXT: Against group ([[G2]]): -; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv ; CHECK-NEXT: Check 3: -; CHECK-NEXT: Comparing group ([[G3]]): -; CHECK-NEXT: %arrayidx7 = getelementptr inbounds double, ptr %.sink, i64 %indvars.iv +; CHECK-NEXT: Comparing group ([[G2]]): +; CHECK-NEXT: %arrayidx7.in12 = getelementptr inbounds double, ptr %2, i64 %indvars.iv +; CHECK-NEXT: Against group ([[G3]]): +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv +; CHECK-NEXT: Check 4: +; CHECK-NEXT: Comparing group ([[G2]]): +; CHECK-NEXT: %arrayidx7.in12 = getelementptr inbounds double, ptr %2, i64 %indvars.iv ; CHECK-NEXT: Against group ([[G4]]): ; CHECK-NEXT: %arrayidx5 = getelementptr inbounds double, ptr %0, i64 %indvars.iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group [[G1]]: ; CHECK-NEXT: (Low: %1 High: (63992 + %1)) ; CHECK-NEXT: Member: {%1,+,8}<nw><%for.body> -; CHECK-NEXT: Group [[G3]]: +; CHECK-NEXT: Group [[G2]]: ; CHECK-NEXT: (Low: %2 High: (63992 + %2)) ; CHECK-NEXT: Member: {%2,+,8}<nw><%for.body> -; CHECK-NEXT: Group [[G2]]: +; CHECK-NEXT: Group [[G3]]: ; CHECK-NEXT: (Low: %Preds High: (31996 + %Preds)) ; CHECK-NEXT: Member: {%Preds,+,4}<nuw><%for.body> ; CHECK-NEXT: Group [[G4]]: @@ -1058,4 +1063,74 @@ for.inc: ; preds = %if.br1, %if.br0 for.cond.cleanup: ; preds = %for.inc ret void +} + +define void @forked_write_ptr_with_read_of_same_base(ptr nocapture readonly %Preds, ptr nocapture %a, ptr nocapture writeonly %b, ptr nocapture readnone %c) { +; CHECK: for.body: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[G1:.+]]): +; CHECK-NEXT: %arrayidx5.in01 = getelementptr inbounds double, ptr %b, i64 %indvars.iv +; CHECK-NEXT: Against group ([[G2:.+]]): +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv +; CHECK-NEXT: %arrayidx5.in12 = getelementptr inbounds double, ptr %a, i64 %indvars.iv +; CHECK-NEXT: Check 1: +; CHECK-NEXT: Comparing group ([[G1]]): +; CHECK-NEXT: %arrayidx5.in01 = getelementptr inbounds double, ptr %b, i64 %indvars.iv +; CHECK-NEXT: Against group ([[G3:.+]]): +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv +; CHECK-NEXT: Check 2: +; CHECK-NEXT: Comparing group ([[G2]]): +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv +; CHECK-NEXT: %arrayidx5.in12 = getelementptr inbounds double, ptr %a, i64 %indvars.iv +; CHECK-NEXT: Against group ([[G3]]): +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[G1]]: +; CHECK-NEXT: (Low: %b High: (8000 + %b)) +; CHECK-NEXT: Member: {%b,+,8}<nw><%for.body> +; CHECK-NEXT: Group [[G2]]: +; CHECK-NEXT: (Low: %a High: (8000 + %a)) +; CHECK-NEXT: Member: {%a,+,8}<nw><%for.body> +; CHECK-NEXT: Member: {%a,+,8}<nw><%for.body> +; CHECK-NEXT: Group [[G3]]: +; CHECK-NEXT: (Low: %Preds High: (4000 + %Preds)) +; CHECK-NEXT: Member: {%Preds,+,4}<nuw><%for.body> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.inc + ret void + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1.not = icmp eq i32 %0, 0 + br i1 %cmp1.not, label %if.else, label %if.then + +if.then: ; preds = %for.body + %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %indvars.iv + %1 = load double, ptr %arrayidx3, align 8 + %add = fadd fast double %1, 1.000000e+00 + br label %for.inc + +if.else: ; preds = %for.body + %2 = mul nuw nsw i64 %indvars.iv, %indvars.iv + %3 = trunc i64 %2 to i32 + %conv = sitofp i32 %3 to double + br label %for.inc + +for.inc: ; preds = %if.then, %if.else + %b.sink = phi ptr [ %b, %if.then ], [ %a, %if.else ] + %add.sink = phi double [ %add, %if.then ], [ %conv, %if.else ] + %arrayidx5 = getelementptr inbounds double, ptr %b.sink, i64 %indvars.iv + store double %add.sink, ptr %arrayidx5, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } \ No newline at end of file |
nikic left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is creating instructions during an analysis pass, which is obviously a no-go. As far as I can tell, there isn't even anything trying to clean them up afterwards?
| Thank you for clarifying this. |
Given a function like the following: https://godbolt.org/z/nqW3637Mf
LLVM will optimize the IR to a single store by a phi instruction:
Because array A has read and write operations at the same time in the loop body, we need extra runtime checks for this alias set compare to the previous scenario in commit be92112.
To track the forked pointer, we transform it back to 2 PointerInfo entries with separate value just like the IR generated with option -simplifycfg-sink-common=false.