@@ -8,9 +8,9 @@ target triple = "x86_64-unknown-linux-gnu"
88; This should promote
99define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 (ptr %arg , ptr readonly %arg1 ) #0 {
1010; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
11- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
11+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR0:[0-9]+]] {
1212; CHECK-NEXT: bb:
13- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
13+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
1414; CHECK-NEXT: ret void
1515;
1616bb:
2121
2222define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512 (ptr %arg ) #0 {
2323; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
24- ; CHECK-SAME: (ptr [[ARG:%.*]])
24+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
2525; CHECK-NEXT: bb:
2626; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
2727; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
2828; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
29- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
29+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
3030; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
3131; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
3232; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
4545; This should promote
4646define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 (ptr %arg , ptr readonly %arg1 ) #1 {
4747; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
48- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
48+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR1:[0-9]+]] {
4949; CHECK-NEXT: bb:
50- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
50+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
5151; CHECK-NEXT: ret void
5252;
5353bb:
5858
5959define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256 (ptr %arg ) #1 {
6060; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
61- ; CHECK-SAME: (ptr [[ARG:%.*]])
61+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
6262; CHECK-NEXT: bb:
6363; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
6464; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
6565; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
66- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
66+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
6767; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
6868; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
6969; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
8282; This should promote
8383define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 (ptr %arg , ptr readonly %arg1 ) #1 {
8484; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
85- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
85+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR1]] {
8686; CHECK-NEXT: bb:
87- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
87+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
8888; CHECK-NEXT: ret void
8989;
9090bb:
9595
9696define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256 (ptr %arg ) #0 {
9797; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
98- ; CHECK-SAME: (ptr [[ARG:%.*]])
98+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
9999; CHECK-NEXT: bb:
100100; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
101101; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
102102; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
103- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
103+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
104104; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
105105; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
106106; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
119119; This should promote
120120define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 (ptr %arg , ptr readonly %arg1 ) #0 {
121121; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
122- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
122+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR0]] {
123123; CHECK-NEXT: bb:
124- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
124+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
125125; CHECK-NEXT: ret void
126126;
127127bb:
@@ -132,12 +132,12 @@ bb:
132132
133133define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512 (ptr %arg ) #1 {
134134; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
135- ; CHECK-SAME: (ptr [[ARG:%.*]])
135+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
136136; CHECK-NEXT: bb:
137137; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
138138; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
139139; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
140- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
140+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
141141; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
142142; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
143143; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -156,10 +156,10 @@ bb:
156156; This should not promote
157157define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 (ptr %arg , ptr readonly %arg1 ) #1 {
158158; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
159- ; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
159+ ; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR1]] {
160160; CHECK-NEXT: bb:
161- ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
162- ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
161+ ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
162+ ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
163163; CHECK-NEXT: ret void
164164;
165165bb:
170170
171171define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256 (ptr %arg ) #2 {
172172; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
173- ; CHECK-SAME: (ptr [[ARG:%.*]])
173+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
174174; CHECK-NEXT: bb:
175175; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
176176; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -193,10 +193,10 @@ bb:
193193; This should not promote
194194define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 (ptr %arg , ptr readonly %arg1 ) #2 {
195195; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
196- ; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
196+ ; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR2]] {
197197; CHECK-NEXT: bb:
198- ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
199- ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
198+ ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
199+ ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
200200; CHECK-NEXT: ret void
201201;
202202bb:
207207
208208define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256 (ptr %arg ) #1 {
209209; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
210- ; CHECK-SAME: (ptr [[ARG:%.*]])
210+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
211211; CHECK-NEXT: bb:
212212; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
213213; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
230230; This should promote
231231define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 (ptr %arg , ptr readonly %arg1 ) #3 {
232232; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
233- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
233+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR3:[0-9]+]] {
234234; CHECK-NEXT: bb:
235- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
235+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
236236; CHECK-NEXT: ret void
237237;
238238bb:
@@ -243,12 +243,12 @@ bb:
243243
244244define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256 (ptr %arg ) #4 {
245245; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
246- ; CHECK-SAME: (ptr [[ARG:%.*]])
246+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
247247; CHECK-NEXT: bb:
248248; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
249249; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
250250; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
251- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
251+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
252252; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
253253; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
254254; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
267267; This should promote
268268define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 (ptr %arg , ptr readonly %arg1 ) #4 {
269269; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
270- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
270+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR3]] {
271271; CHECK-NEXT: bb:
272- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
272+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
273273; CHECK-NEXT: ret void
274274;
275275bb:
@@ -280,12 +280,12 @@ bb:
280280
281281define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256 (ptr %arg ) #3 {
282282; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
283- ; CHECK-SAME: (ptr [[ARG:%.*]])
283+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
284284; CHECK-NEXT: bb:
285285; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
286286; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
287287; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
288- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
288+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
289289; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
290290; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
291291; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
304304; If the arguments are scalar, its ok to promote.
305305define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 (ptr %X , ptr %Y ) #2 {
306306; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
307- ; CHECK-SAME: (i32 [[X_VAL :%.*]], i32 [[Y_VAL :%.*]])
308- ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL ]], [[Y_VAL ]]
307+ ; CHECK-SAME: (i32 [[X_0_VAL :%.*]], i32 [[Y_0_VAL :%.*]]) #[[ATTR2]] {
308+ ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_0_VAL ]], [[Y_0_VAL ]]
309309; CHECK-NEXT: ret i32 [[C]]
310310;
311311 %A = load i32 , ptr %X
@@ -316,11 +316,11 @@ define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal51
316316
317317define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256 (ptr %B ) #2 {
318318; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
319- ; CHECK-SAME: (ptr [[B:%.*]])
320- ; CHECK-NEXT: [[A:%.*]] = alloca i32
321- ; CHECK-NEXT: store i32 1, ptr [[A]]
322- ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
323- ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
319+ ; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
320+ ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
321+ ; CHECK-NEXT: store i32 1, ptr [[A]], align 4
322+ ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
323+ ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
324324; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
325325; CHECK-NEXT: ret i32 [[C]]
326326;
@@ -333,8 +333,8 @@ define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr
333333; If the arguments are scalar, its ok to promote.
334334define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 (ptr %X , ptr %Y ) #2 {
335335; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
336- ; CHECK-SAME: (i32 [[X_VAL :%.*]], i32 [[Y_VAL :%.*]])
337- ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL ]], [[Y_VAL ]]
336+ ; CHECK-SAME: (i32 [[X_0_VAL :%.*]], i32 [[Y_0_VAL :%.*]]) #[[ATTR2]] {
337+ ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_0_VAL ]], [[Y_0_VAL ]]
338338; CHECK-NEXT: ret i32 [[C]]
339339;
340340 %A = load i32 , ptr %X
@@ -345,11 +345,11 @@ define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal25
345345
346346define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256 (ptr %B ) #2 {
347347; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
348- ; CHECK-SAME: (ptr [[B:%.*]])
349- ; CHECK-NEXT: [[A:%.*]] = alloca i32
350- ; CHECK-NEXT: store i32 1, ptr [[A]]
351- ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
352- ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
348+ ; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
349+ ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
350+ ; CHECK-NEXT: store i32 1, ptr [[A]], align 4
351+ ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
352+ ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
353353; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
354354; CHECK-NEXT: ret i32 [[C]]
355355;
0 commit comments