Skip to content

Commit 985dc69

Browse files
committed
[LV] Add test for missed interleaving after narrowing interleave groups.
Add extra test coverage for #149706. The added loop should be interleaved, after narrowing interleave groups, which requires moving the transform earlier.
1 parent c193d71 commit 985dc69

File tree

1 file changed

+60
-0
lines changed

1 file changed

+60
-0
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,4 +437,64 @@ exit:
437437
ret void
438438
}
439439

440+
; FIXME: We should interleave by 2 after narrowing interleave groups to saturate
441+
; load/store units.
442+
define void @test_interleave_after_narrowing(i32 %n, ptr %x, ptr noalias %y) {
443+
; CHECK-LABEL: define void @test_interleave_after_narrowing(
444+
; CHECK-SAME: i32 [[N:%.*]], ptr [[X:%.*]], ptr noalias [[Y:%.*]]) {
445+
; CHECK-NEXT: [[ENTRY:.*:]]
446+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
447+
; CHECK: [[VECTOR_PH]]:
448+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
449+
; CHECK: [[VECTOR_BODY]]:
450+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
451+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
452+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[X]], i64 [[OFFSET_IDX]]
453+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
454+
; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[WIDE_LOAD]]
455+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[Y]], i64 [[OFFSET_IDX]]
456+
; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[TMP2]], align 4
457+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
458+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
459+
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
460+
; CHECK: [[MIDDLE_BLOCK]]:
461+
; CHECK-NEXT: br [[EXIT:label %.*]]
462+
; CHECK: [[SCALAR_PH]]:
463+
;
464+
entry:
465+
br label %loop
466+
467+
loop:
468+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
469+
%gep.x = getelementptr inbounds nuw float, ptr %x, i64 %iv
470+
%l.x = load float, ptr %gep.x, align 4
471+
%neg.0 = fneg float %l.x
472+
%gep.y = getelementptr inbounds nuw float, ptr %y, i64 %iv
473+
store float %neg.0, ptr %gep.y, align 4
474+
%iv.1 = or disjoint i64 %iv, 1
475+
%gep.x.1 = getelementptr inbounds nuw float, ptr %x, i64 %iv.1
476+
%l.x.1 = load float, ptr %gep.x.1, align 4
477+
%neg.1 = fneg float %l.x.1
478+
%gep.y.1 = getelementptr inbounds nuw float, ptr %y, i64 %iv.1
479+
store float %neg.1, ptr %gep.y.1, align 4
480+
%iv.2 = or disjoint i64 %iv, 2
481+
%gep.x.2 = getelementptr inbounds nuw float, ptr %x, i64 %iv.2
482+
%l.x.2 = load float, ptr %gep.x.2, align 4
483+
%neg.2 = fneg float %l.x.2
484+
%gep.y.2 = getelementptr inbounds nuw float, ptr %y, i64 %iv.2
485+
store float %neg.2, ptr %gep.y.2, align 4
486+
%iv.3 = or disjoint i64 %iv, 3
487+
%gep.x.3 = getelementptr inbounds nuw float, ptr %x, i64 %iv.3
488+
%l.x.3 = load float, ptr %gep.x.3, align 4
489+
%neg.3 = fneg float %l.x.3
490+
%gep.y.3 = getelementptr inbounds nuw float, ptr %y, i64 %iv.3
491+
store float %neg.3, ptr %gep.y.3, align 4
492+
%iv.next = add nuw nsw i64 %iv, 4
493+
%ec = icmp samesign ult i64 %iv, 1020
494+
br i1 %ec, label %loop, label %exit
495+
496+
exit:
497+
ret void
498+
}
499+
440500
attributes #0 = { "target-cpu"="neoverse-v2" }

0 commit comments

Comments
 (0)