Skip to content

Commit 3ac2f16

Browse files
committed
[Attributor] Fix Load/Store Offsets if multiple bins are present for a pointer allocation.
1 parent f4c0c40 commit 3ac2f16

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1231
-835
lines changed

llvm/include/llvm/Transforms/IPO/Attributor.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6122,6 +6122,8 @@ struct AAPointerInfo : public AbstractAttribute {
61226122
virtual const_bin_iterator begin() const = 0;
61236123
virtual const_bin_iterator end() const = 0;
61246124
virtual int64_t numOffsetBins() const = 0;
6125+
virtual void dumpState(raw_ostream &O) const = 0;
6126+
virtual const Access &getBinAccess(unsigned Index) const = 0;
61256127

61266128
/// Call \p CB on all accesses that might interfere with \p Range and return
61276129
/// true if all such accesses were known and the callback returned true for
@@ -6293,6 +6295,9 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
62936295

62946296
virtual std::optional<TypeSize> getAllocatedSize() const = 0;
62956297

6298+
using NewOffsetsTy = DenseMap<AA::RangeTy, AA::RangeTy>;
6299+
virtual const NewOffsetsTy &getNewOffsets() const = 0;
6300+
62966301
/// See AbstractAttribute::getName()
62976302
const std::string getName() const override { return "AAAllocationInfo"; }
62986303

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 152 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,10 @@ struct AAPointerInfoImpl
10831083
return State::numOffsetBins();
10841084
}
10851085

1086+
virtual const Access &getBinAccess(unsigned Index) const override {
1087+
return getAccess(Index);
1088+
}
1089+
10861090
bool forallInterferingAccesses(
10871091
AA::RangeTy Range,
10881092
function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
@@ -1429,7 +1433,7 @@ struct AAPointerInfoImpl
14291433
void trackPointerInfoStatistics(const IRPosition &IRP) const {}
14301434

14311435
/// Dump the state into \p O.
1432-
void dumpState(raw_ostream &O) {
1436+
virtual void dumpState(raw_ostream &O) const override {
14331437
for (auto &It : OffsetBins) {
14341438
O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size
14351439
<< "] : " << It.getSecond().size() << "\n";
@@ -12686,6 +12690,11 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
1268612690
return AssumedAllocatedSize;
1268712691
}
1268812692

12693+
const NewOffsetsTy &getNewOffsets() const override {
12694+
assert(isValidState() && "the AA is invalid");
12695+
return NewComputedOffsets;
12696+
}
12697+
1268912698
std::optional<TypeSize> findInitialAllocationSize(Instruction *I,
1269012699
const DataLayout &DL) {
1269112700

@@ -12735,37 +12744,42 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
1273512744
if (*AllocationSize == 0)
1273612745
return indicatePessimisticFixpoint();
1273712746

12738-
int64_t BinSize = PI->numOffsetBins();
12739-
12740-
// TODO: implement for multiple bins
12741-
if (BinSize > 1)
12742-
return indicatePessimisticFixpoint();
12747+
int64_t NumBins = PI->numOffsetBins();
1274312748

12744-
if (BinSize == 0) {
12749+
if (NumBins == 0) {
1274512750
auto NewAllocationSize = std::optional<TypeSize>(TypeSize(0, false));
1274612751
if (!changeAllocationSize(NewAllocationSize))
1274712752
return ChangeStatus::UNCHANGED;
1274812753
return ChangeStatus::CHANGED;
1274912754
}
1275012755

12751-
// TODO: refactor this to be part of multiple bin case
12752-
const auto &It = PI->begin();
12756+
// For each access bin
12757+
// Compute its new start Offset and store the results in a new map
12758+
// (NewOffsetBins).
12759+
int64_t PrevBinEndOffset = 0;
12760+
bool ChangedOffsets = false;
12761+
for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
12762+
It != PI->end(); It++) {
12763+
const AA::RangeTy &OldRange = It->getFirst();
12764+
int64_t NewStartOffset = PrevBinEndOffset;
12765+
int64_t NewEndOffset = NewStartOffset + OldRange.Size;
12766+
PrevBinEndOffset = NewEndOffset;
1275312767

12754-
// TODO: handle if Offset is not zero
12755-
if (It->first.Offset != 0)
12756-
return indicatePessimisticFixpoint();
12757-
12758-
uint64_t SizeOfBin = It->first.Offset + It->first.Size;
12759-
12760-
if (SizeOfBin >= *AllocationSize)
12761-
return indicatePessimisticFixpoint();
12768+
ChangedOffsets |= setNewOffsets(OldRange, OldRange.Offset, NewStartOffset,
12769+
OldRange.Size);
12770+
}
1276212771

12772+
// Set the new size of the allocation, the new size of the Allocation should
12773+
// be the size of NewEndOffset * 8, in bits.
1276312774
auto NewAllocationSize =
12764-
std::optional<TypeSize>(TypeSize(SizeOfBin * 8, false));
12775+
std::optional<TypeSize>(TypeSize(PrevBinEndOffset * 8, false));
1276512776

1276612777
if (!changeAllocationSize(NewAllocationSize))
1276712778
return ChangeStatus::UNCHANGED;
1276812779

12780+
if (!ChangedOffsets)
12781+
return ChangeStatus::UNCHANGED;
12782+
1276912783
return ChangeStatus::CHANGED;
1277012784
}
1277112785

@@ -12775,12 +12789,13 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
1277512789
assert(isValidState() &&
1277612790
"Manifest should only be called if the state is valid.");
1277712791

12778-
Instruction *I = getIRPosition().getCtxI();
12792+
bool Changed = false;
12793+
const IRPosition &IRP = getIRPosition();
12794+
Instruction *I = IRP.getCtxI();
1277912795

1278012796
auto FixedAllocatedSizeInBits = getAllocatedSize()->getFixedValue();
1278112797

1278212798
unsigned long NumBytesToAllocate = (FixedAllocatedSizeInBits + 7) / 8;
12783-
1278412799
switch (I->getOpcode()) {
1278512800
// TODO: add case for malloc like calls
1278612801
case Instruction::Alloca: {
@@ -12789,25 +12804,98 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
1278912804

1279012805
Type *CharType = Type::getInt8Ty(I->getContext());
1279112806

12792-
auto *NumBytesToValue =
12793-
ConstantInt::get(I->getContext(), APInt(32, NumBytesToAllocate));
12807+
Type *CharArrayType = ArrayType::get(CharType, NumBytesToAllocate);
1279412808

1279512809
BasicBlock::iterator insertPt = AI->getIterator();
1279612810
insertPt = std::next(insertPt);
12797-
AllocaInst *NewAllocaInst =
12798-
new AllocaInst(CharType, AI->getAddressSpace(), NumBytesToValue,
12799-
AI->getAlign(), AI->getName(), insertPt);
12800-
12801-
if (A.changeAfterManifest(IRPosition::inst(*AI), *NewAllocaInst))
12802-
return ChangeStatus::CHANGED;
12811+
AllocaInst *NewAllocaInst = new AllocaInst(
12812+
CharArrayType, AI->getAddressSpace(), AI->getName(), insertPt);
1280312813

12814+
Changed |= A.changeAfterManifest(IRPosition::inst(*AI), *NewAllocaInst);
1280412815
break;
1280512816
}
1280612817
default:
1280712818
break;
1280812819
}
1280912820

12810-
return ChangeStatus::UNCHANGED;
12821+
const AAPointerInfo *PI =
12822+
A.getOrCreateAAFor<AAPointerInfo>(IRP, *this, DepClassTy::REQUIRED);
12823+
12824+
if (!PI)
12825+
return ChangeStatus::UNCHANGED;
12826+
12827+
if (!PI->getState().isValidState())
12828+
return ChangeStatus::UNCHANGED;
12829+
12830+
const auto &NewOffsetsMap = getNewOffsets();
12831+
for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
12832+
It != PI->end(); It++) {
12833+
12834+
const auto &OldOffsetRange = It->getFirst();
12835+
12836+
// If the OldOffsetRange is not in the map, offsets for that bin did not
12837+
// change We should just continue and skip changing the offsets in that
12838+
// case
12839+
if (!NewOffsetsMap.contains(OldOffsetRange))
12840+
continue;
12841+
12842+
const auto &NewOffsetRange = NewOffsetsMap.lookup(OldOffsetRange);
12843+
for (const auto AccIndex : It->getSecond()) {
12844+
12845+
const auto &AccessInstruction = PI->getBinAccess(AccIndex);
12846+
auto *LocalInst = AccessInstruction.getLocalInst();
12847+
12848+
switch (LocalInst->getOpcode()) {
12849+
case Instruction::Load: {
12850+
LoadInst *OldLoadInst = cast<LoadInst>(LocalInst);
12851+
Value *PointerOperand = OldLoadInst->getPointerOperand();
12852+
12853+
IntegerType *Int8TyInteger =
12854+
IntegerType::get(LocalInst->getContext(), 8);
12855+
IntegerType *Int64TyInteger =
12856+
IntegerType::get(LocalInst->getContext(), 64);
12857+
Value *indexList[2] = {
12858+
ConstantInt::get(Int64TyInteger, 0),
12859+
ConstantInt::get(Int64TyInteger,
12860+
NewOffsetRange.Offset - OldOffsetRange.Offset)};
12861+
Value *GepToNewAddress = GetElementPtrInst::Create(
12862+
Int8TyInteger, PointerOperand, indexList, "NewGep", OldLoadInst);
12863+
12864+
LoadInst *NewLoadInst =
12865+
new LoadInst(OldLoadInst->getType(), GepToNewAddress,
12866+
OldLoadInst->getName(), OldLoadInst);
12867+
Changed |= A.changeAfterManifest(IRPosition::inst(*OldLoadInst),
12868+
*NewLoadInst);
12869+
break;
12870+
}
12871+
case Instruction::Store: {
12872+
StoreInst *OldStoreInst = cast<StoreInst>(LocalInst);
12873+
Value *PointerOperand = OldStoreInst->getPointerOperand();
12874+
12875+
IntegerType *Int8TyInteger =
12876+
IntegerType::get(LocalInst->getContext(), 8);
12877+
IntegerType *Int64TyInteger =
12878+
IntegerType::get(LocalInst->getContext(), 64);
12879+
Value *indexList[2] = {
12880+
ConstantInt::get(Int64TyInteger, 0),
12881+
ConstantInt::get(Int64TyInteger,
12882+
NewOffsetRange.Offset - OldOffsetRange.Offset)};
12883+
Value *GepToNewAddress = GetElementPtrInst::Create(
12884+
Int8TyInteger, PointerOperand, indexList, "NewGep", OldStoreInst);
12885+
12886+
StoreInst *NewStoreInst = new StoreInst(
12887+
OldStoreInst->getValueOperand(), GepToNewAddress, OldStoreInst);
12888+
Changed |= A.changeAfterManifest(IRPosition::inst(*OldStoreInst),
12889+
*NewStoreInst);
12890+
break;
12891+
}
12892+
}
12893+
}
12894+
}
12895+
12896+
if (!Changed)
12897+
return ChangeStatus::UNCHANGED;
12898+
return ChangeStatus::CHANGED;
1281112899
}
1281212900

1281312901
/// See AbstractAttribute::getAsStr().
@@ -12821,8 +12909,28 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
1282112909
")";
1282212910
}
1282312911

12912+
void dumpNewOffsetBins(raw_ostream &O) {
12913+
12914+
O << "Printing Map from [OldOffsetsRange] : [NewOffsetsRange] if the "
12915+
"offsets changed."
12916+
<< "\n";
12917+
const auto &NewOffsetsMap = getNewOffsets();
12918+
for (auto It = NewOffsetsMap.begin(); It != NewOffsetsMap.end(); It++) {
12919+
12920+
const auto &OldRange = It->getFirst();
12921+
const auto &NewRange = It->getSecond();
12922+
12923+
O << "[" << OldRange.Offset << "," << OldRange.Offset + OldRange.Size
12924+
<< "] : ";
12925+
O << "[" << NewRange.Offset << "," << NewRange.Offset + NewRange.Size
12926+
<< "]";
12927+
O << "\n";
12928+
}
12929+
}
12930+
1282412931
private:
1282512932
std::optional<TypeSize> AssumedAllocatedSize = HasNoAllocationSize;
12933+
NewOffsetsTy NewComputedOffsets;
1282612934

1282712935
// Maintain the computed allocation size of the object.
1282812936
// Returns (bool) weather the size of the allocation was modified or not.
@@ -12834,6 +12942,21 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
1283412942
}
1283512943
return false;
1283612944
}
12945+
12946+
// Maps an old byte range to its new Offset range in the new allocation.
12947+
// Returns (bool) weather the old byte range's offsets changed or not.
12948+
bool setNewOffsets(const AA::RangeTy &OldRange, int64_t OldOffset,
12949+
int64_t NewComputedOffset, int64_t Size) {
12950+
12951+
if (OldOffset == NewComputedOffset)
12952+
return false;
12953+
12954+
AA::RangeTy &NewRange = NewComputedOffsets.getOrInsertDefault(OldRange);
12955+
NewRange.Offset = NewComputedOffset;
12956+
NewRange.Size = Size;
12957+
12958+
return true;
12959+
}
1283712960
};
1283812961

1283912962
struct AAAllocationInfoFloating : AAAllocationInfoImpl {

llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ define internal i32 @deref(ptr %x) nounwind {
99
; CGSCC-NEXT: entry:
1010
; CGSCC-NEXT: [[X_PRIV:%.*]] = alloca i32, align 4
1111
; CGSCC-NEXT: store i32 [[TMP0]], ptr [[X_PRIV]], align 4
12-
; CGSCC-NEXT: [[TRUETMP2:%.*]] = load i32, ptr [[X_PRIV]], align 4
13-
; CGSCC-NEXT: ret i32 [[TRUETMP2]]
12+
; CGSCC-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_PRIV]], align 4
13+
; CGSCC-NEXT: ret i32 [[TMP2]]
1414
;
1515
entry:
1616
%tmp2 = load i32, ptr %x, align 4
@@ -22,18 +22,18 @@ define i32 @f(i32 %x) {
2222
; TUNIT-LABEL: define {{[^@]+}}@f
2323
; TUNIT-SAME: (i32 returned [[X:%.*]]) #[[ATTR0:[0-9]+]] {
2424
; TUNIT-NEXT: entry:
25-
; TUNIT-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
26-
; TUNIT-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
25+
; TUNIT-NEXT: [[X_ADDR1:%.*]] = alloca i8, i32 4, align 4
26+
; TUNIT-NEXT: store i32 [[X]], ptr [[X_ADDR1]], align 4
2727
; TUNIT-NEXT: ret i32 [[X]]
2828
;
2929
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
3030
; CGSCC-LABEL: define {{[^@]+}}@f
3131
; CGSCC-SAME: (i32 [[X:%.*]]) #[[ATTR1:[0-9]+]] {
3232
; CGSCC-NEXT: entry:
33-
; CGSCC-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
34-
; CGSCC-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
35-
; CGSCC-NEXT: [[TRUETMP1:%.*]] = call i32 @deref(i32 [[X]]) #[[ATTR2:[0-9]+]]
36-
; CGSCC-NEXT: ret i32 [[TRUETMP1]]
33+
; CGSCC-NEXT: [[X_ADDR1:%.*]] = alloca i8, i32 4, align 4
34+
; CGSCC-NEXT: store i32 [[X]], ptr [[X_ADDR1]], align 4
35+
; CGSCC-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[X]]) #[[ATTR2:[0-9]+]]
36+
; CGSCC-NEXT: ret i32 [[TMP1]]
3737
;
3838
entry:
3939
%x_addr = alloca i32

llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,24 +26,24 @@ define void @no_promote(ptr %arg) #1 {
2626
; TUNIT-LABEL: define {{[^@]+}}@no_promote
2727
; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1:[0-9]+]] {
2828
; TUNIT-NEXT: bb:
29-
; TUNIT-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
30-
; TUNIT-NEXT: [[TRUETMP2:%.*]] = alloca <4 x i64>, align 32
31-
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3:[0-9]+]]
32-
; TUNIT-NEXT: call fastcc void @no_promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TRUETMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) #[[ATTR4:[0-9]+]]
33-
; TUNIT-NEXT: [[TRUETMP4:%.*]] = load <4 x i64>, ptr [[TRUETMP2]], align 32
34-
; TUNIT-NEXT: store <4 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
29+
; TUNIT-NEXT: [[TMP1:%.*]] = alloca i8, i32 32, align 32
30+
; TUNIT-NEXT: [[TMP22:%.*]] = alloca i8, i32 32, align 32
31+
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP1]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3:[0-9]+]]
32+
; TUNIT-NEXT: call fastcc void @no_promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP22]], ptr noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP1]]) #[[ATTR4:[0-9]+]]
33+
; TUNIT-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP22]], align 32
34+
; TUNIT-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2
3535
; TUNIT-NEXT: ret void
3636
;
3737
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) uwtable
3838
; CGSCC-LABEL: define {{[^@]+}}@no_promote
3939
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(32) [[ARG:%.*]]) #[[ATTR1:[0-9]+]] {
4040
; CGSCC-NEXT: bb:
4141
; CGSCC-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
42-
; CGSCC-NEXT: [[TRUETMP2:%.*]] = alloca <4 x i64>, align 32
42+
; CGSCC-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
4343
; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3:[0-9]+]]
44-
; CGSCC-NEXT: call fastcc void @no_promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TRUETMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) #[[ATTR4:[0-9]+]]
45-
; CGSCC-NEXT: [[TRUETMP4:%.*]] = load <4 x i64>, ptr [[TRUETMP2]], align 32
46-
; CGSCC-NEXT: store <4 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
44+
; CGSCC-NEXT: call fastcc void @no_promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) #[[ATTR4:[0-9]+]]
45+
; CGSCC-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32
46+
; CGSCC-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2
4747
; CGSCC-NEXT: ret void
4848
;
4949
bb:
@@ -78,26 +78,26 @@ define void @promote(ptr %arg) #0 {
7878
; TUNIT-LABEL: define {{[^@]+}}@promote
7979
; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] {
8080
; TUNIT-NEXT: bb:
81-
; TUNIT-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
82-
; TUNIT-NEXT: [[TRUETMP2:%.*]] = alloca <4 x i64>, align 32
83-
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3]]
84-
; TUNIT-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[TMP]], align 32
85-
; TUNIT-NEXT: call fastcc void @promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TRUETMP2]], <4 x i64> [[TMP0]]) #[[ATTR4]]
86-
; TUNIT-NEXT: [[TRUETMP4:%.*]] = load <4 x i64>, ptr [[TRUETMP2]], align 32
87-
; TUNIT-NEXT: store <4 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
81+
; TUNIT-NEXT: [[TMP1:%.*]] = alloca i8, i32 32, align 32
82+
; TUNIT-NEXT: [[TMP22:%.*]] = alloca i8, i32 32, align 32
83+
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP1]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3]]
84+
; TUNIT-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32
85+
; TUNIT-NEXT: call fastcc void @promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP22]], <4 x i64> [[TMP0]]) #[[ATTR4]]
86+
; TUNIT-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP22]], align 32
87+
; TUNIT-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2
8888
; TUNIT-NEXT: ret void
8989
;
9090
; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
9191
; CGSCC-LABEL: define {{[^@]+}}@promote
9292
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(32) [[ARG:%.*]]) #[[ATTR0]] {
9393
; CGSCC-NEXT: bb:
94-
; CGSCC-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
95-
; CGSCC-NEXT: [[TRUETMP2:%.*]] = alloca <4 x i64>, align 32
96-
; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3]]
97-
; CGSCC-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[TMP]], align 32
98-
; CGSCC-NEXT: call fastcc void @promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TRUETMP2]], <4 x i64> [[TMP0]]) #[[ATTR4]]
99-
; CGSCC-NEXT: [[TRUETMP4:%.*]] = load <4 x i64>, ptr [[TRUETMP2]], align 32
100-
; CGSCC-NEXT: store <4 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
94+
; CGSCC-NEXT: [[TMP1:%.*]] = alloca i8, i32 32, align 32
95+
; CGSCC-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
96+
; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP1]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3]]
97+
; CGSCC-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[TMP1]], align 32
98+
; CGSCC-NEXT: call fastcc void @promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) #[[ATTR4]]
99+
; CGSCC-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32
100+
; CGSCC-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2
101101
; CGSCC-NEXT: ret void
102102
;
103103
bb:

0 commit comments

Comments
 (0)