@@ -437,4 +437,64 @@ exit:
437437 ret void
438438}
439439
440+ ; FIXME: We should interleave by 2 after narrowing interleave groups to saturate
441+ ; load/store units.
442+ define void @test_interleave_after_narrowing (i32 %n , ptr %x , ptr noalias %y ) {
443+ ; CHECK-LABEL: define void @test_interleave_after_narrowing(
444+ ; CHECK-SAME: i32 [[N:%.*]], ptr [[X:%.*]], ptr noalias [[Y:%.*]]) {
445+ ; CHECK-NEXT: [[ENTRY:.*:]]
446+ ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
447+ ; CHECK: [[VECTOR_PH]]:
448+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
449+ ; CHECK: [[VECTOR_BODY]]:
450+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
451+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
452+ ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[X]], i64 [[OFFSET_IDX]]
453+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
454+ ; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[WIDE_LOAD]]
455+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[Y]], i64 [[OFFSET_IDX]]
456+ ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[TMP2]], align 4
457+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
458+ ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
459+ ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
460+ ; CHECK: [[MIDDLE_BLOCK]]:
461+ ; CHECK-NEXT: br [[EXIT:label %.*]]
462+ ; CHECK: [[SCALAR_PH]]:
463+ ;
464+ entry:
465+ br label %loop
466+
467+ loop:
468+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
469+ %gep.x = getelementptr inbounds nuw float , ptr %x , i64 %iv
470+ %l.x = load float , ptr %gep.x , align 4
471+ %neg.0 = fneg float %l.x
472+ %gep.y = getelementptr inbounds nuw float , ptr %y , i64 %iv
473+ store float %neg.0 , ptr %gep.y , align 4
474+ %iv.1 = or disjoint i64 %iv , 1
475+ %gep.x.1 = getelementptr inbounds nuw float , ptr %x , i64 %iv.1
476+ %l.x.1 = load float , ptr %gep.x.1 , align 4
477+ %neg.1 = fneg float %l.x.1
478+ %gep.y.1 = getelementptr inbounds nuw float , ptr %y , i64 %iv.1
479+ store float %neg.1 , ptr %gep.y.1 , align 4
480+ %iv.2 = or disjoint i64 %iv , 2
481+ %gep.x.2 = getelementptr inbounds nuw float , ptr %x , i64 %iv.2
482+ %l.x.2 = load float , ptr %gep.x.2 , align 4
483+ %neg.2 = fneg float %l.x.2
484+ %gep.y.2 = getelementptr inbounds nuw float , ptr %y , i64 %iv.2
485+ store float %neg.2 , ptr %gep.y.2 , align 4
486+ %iv.3 = or disjoint i64 %iv , 3
487+ %gep.x.3 = getelementptr inbounds nuw float , ptr %x , i64 %iv.3
488+ %l.x.3 = load float , ptr %gep.x.3 , align 4
489+ %neg.3 = fneg float %l.x.3
490+ %gep.y.3 = getelementptr inbounds nuw float , ptr %y , i64 %iv.3
491+ store float %neg.3 , ptr %gep.y.3 , align 4
492+ %iv.next = add nuw nsw i64 %iv , 4
493+ %ec = icmp samesign ult i64 %iv , 1020
494+ br i1 %ec , label %loop , label %exit
495+
496+ exit:
497+ ret void
498+ }
499+
440500attributes #0 = { "target-cpu" ="neoverse-v2" }
0 commit comments