Skip to content

Conversation

@gandhi56
Copy link
Contributor

@gandhi56 gandhi56 commented May 1, 2025

Related PR: #134436

@llvmbot
Copy link
Member

llvmbot commented May 1, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Anshil Gandhi (gandhi56)

Changes

Related PR: #134436


Full diff: https://github.com/llvm/llvm-project/pull/138155.diff

1 Files Affected:

  • (modified) llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll (+67-25)
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll index 318e55c748f7f..d6b51039d5b44 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll @@ -1,10 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -mattr=+relaxed-buffer-oob-mode -S -o - %s | FileCheck --check-prefixes=CHECK,CHECK-OOB-RELAXED %s ; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck --check-prefixes=CHECK,CHECK-OOB-STRICT %s -; CHECK-LABEL: @merge_v2i32_v2i32( -; CHECK: load <4 x i32> -; CHECK: store <4 x i32> zeroinitializer define amdgpu_kernel void @merge_v2i32_v2i32(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 { +; CHECK-LABEL: define amdgpu_kernel void @merge_v2i32_v2i32( +; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1) [[B]], align 4 +; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 1> +; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 2, i32 3> +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4 +; CHECK-NEXT: ret void +; entry: %a.1 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %a, i64 1 %b.1 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %b, i64 1 @@ -18,10 +25,16 @@ entry: ret void } -; CHECK-LABEL: @merge_v1i32_v1i32( -; CHECK: load <2 x i32> -; CHECK: store <2 x i32> zeroinitializer define amdgpu_kernel void @merge_v1i32_v1i32(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 { +; CHECK-LABEL: define amdgpu_kernel void @merge_v1i32_v1i32( +; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[B]], align 4 +; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <1 x i32> <i32 1> +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4 +; CHECK-NEXT: ret void +; entry: %a.1 = getelementptr inbounds <1 x i32>, ptr addrspace(1) %a, i64 1 %b.1 = getelementptr inbounds <1 x i32>, ptr addrspace(1) %b, i64 1 @@ -35,12 +48,18 @@ entry: ret void } -; CHECK-LABEL: @no_merge_v3i32_v3i32( -; CHECK: load <3 x i32> -; CHECK: load <3 x i32> -; CHECK: store <3 x i32> zeroinitializer -; CHECK: store <3 x i32> zeroinitializer define amdgpu_kernel void @no_merge_v3i32_v3i32(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 { +; CHECK-LABEL: define amdgpu_kernel void @no_merge_v3i32_v3i32( +; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[A_1:%.*]] = getelementptr inbounds <3 x i32>, ptr addrspace(1) [[A]], i64 1 +; CHECK-NEXT: [[B_1:%.*]] = getelementptr inbounds <3 x i32>, ptr addrspace(1) [[B]], i64 1 +; CHECK-NEXT: [[LD_C:%.*]] = load <3 x i32>, ptr addrspace(1) [[B]], align 4 +; CHECK-NEXT: [[LD_C_IDX_1:%.*]] = load <3 x i32>, ptr addrspace(1) [[B_1]], align 4 +; CHECK-NEXT: store <3 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4 +; CHECK-NEXT: store <3 x i32> zeroinitializer, ptr addrspace(1) [[A_1]], align 4 +; CHECK-NEXT: ret void +; entry: %a.1 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %a, i64 1 %b.1 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %b, i64 1 @@ -54,10 +73,16 @@ entry: ret void } -; CHECK-LABEL: @merge_v2i16_v2i16( -; CHECK: load <4 x i16> -; CHECK: store <4 x i16> zeroinitializer define amdgpu_kernel void @merge_v2i16_v2i16(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 { +; CHECK-LABEL: define amdgpu_kernel void @merge_v2i16_v2i16( +; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 4 +; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> +; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr addrspace(1) [[A]], align 4 +; CHECK-NEXT: ret void +; entry: %a.1 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %a, i64 1 %b.1 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %b, i64 1 @@ -71,15 +96,27 @@ entry: ret void } -; CHECK-OOB-RELAXED-LABEL: @merge_fat_ptrs( -; CHECK-OOB-RELAXED: load <4 x i16> -; CHECK-OOB-RELAXED: store <4 x i16> zeroinitializer -; CHECK-OOB-STRICT-LABEL: @merge_fat_ptrs( -; CHECK-OOB-STRICT: load <2 x i16> -; CHECK-OOB-STRICT: load <2 x i16> -; CHECK-OOB-STRICT: store <2 x i16> zeroinitializer -; CHECK-OOB-STRICT: store <2 x i16> zeroinitializer define amdgpu_kernel void @merge_fat_ptrs(ptr addrspace(7) nocapture %a, ptr addrspace(7) nocapture readonly %b) #0 { +; CHECK-OOB-RELAXED-LABEL: define amdgpu_kernel void @merge_fat_ptrs( +; CHECK-OOB-RELAXED-SAME: ptr addrspace(7) captures(none) [[A:%.*]], ptr addrspace(7) readonly captures(none) [[B:%.*]]) #[[ATTR0]] { +; CHECK-OOB-RELAXED-NEXT: [[ENTRY:.*:]] +; CHECK-OOB-RELAXED-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(7) [[B]], align 4 +; CHECK-OOB-RELAXED-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> +; CHECK-OOB-RELAXED-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> +; CHECK-OOB-RELAXED-NEXT: store <4 x i16> zeroinitializer, ptr addrspace(7) [[A]], align 4 +; CHECK-OOB-RELAXED-NEXT: ret void +; +; CHECK-OOB-STRICT-LABEL: define amdgpu_kernel void @merge_fat_ptrs( +; CHECK-OOB-STRICT-SAME: ptr addrspace(7) captures(none) [[A:%.*]], ptr addrspace(7) readonly captures(none) [[B:%.*]]) #[[ATTR0]] { +; CHECK-OOB-STRICT-NEXT: [[ENTRY:.*:]] +; CHECK-OOB-STRICT-NEXT: [[A_1:%.*]] = getelementptr inbounds <2 x i16>, ptr addrspace(7) [[A]], i32 1 +; CHECK-OOB-STRICT-NEXT: [[B_1:%.*]] = getelementptr inbounds <2 x i16>, ptr addrspace(7) [[B]], i32 1 +; CHECK-OOB-STRICT-NEXT: [[LD_C:%.*]] = load <2 x i16>, ptr addrspace(7) [[B]], align 4 +; CHECK-OOB-STRICT-NEXT: [[LD_C_IDX_1:%.*]] = load <2 x i16>, ptr addrspace(7) [[B_1]], align 4 +; CHECK-OOB-STRICT-NEXT: store <2 x i16> zeroinitializer, ptr addrspace(7) [[A]], align 4 +; CHECK-OOB-STRICT-NEXT: store <2 x i16> zeroinitializer, ptr addrspace(7) [[A_1]], align 4 +; CHECK-OOB-STRICT-NEXT: ret void +; entry: %a.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %a, i32 1 %b.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %b, i32 1 @@ -94,10 +131,15 @@ entry: } ; Ideally this would be merged -; CHECK-LABEL: @merge_load_i32_v2i16( -; CHECK: load i32, -; CHECK: load <2 x i16> define amdgpu_kernel void @merge_load_i32_v2i16(ptr addrspace(1) nocapture %a) #0 { +; CHECK-LABEL: define amdgpu_kernel void @merge_load_i32_v2i16( +; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[A_1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[A]], i32 1 +; CHECK-NEXT: [[LD_0:%.*]] = load i32, ptr addrspace(1) [[A]], align 4 +; CHECK-NEXT: [[LD_1:%.*]] = load <2 x i16>, ptr addrspace(1) [[A_1]], align 4 +; CHECK-NEXT: ret void +; entry: %a.1 = getelementptr inbounds i32, ptr addrspace(1) %a, i32 1 
@gandhi56 gandhi56 merged commit 0e9740e into llvm:main May 1, 2025
5 of 9 checks passed
@gandhi56 gandhi56 deleted the precommit-merge-vectors branch May 1, 2025 16:00
@gandhi56 gandhi56 restored the precommit-merge-vectors branch May 1, 2025 16:08
gandhi56 added a commit that referenced this pull request May 1, 2025
gandhi56 added a commit that referenced this pull request May 1, 2025
Autogenerate checks for merge-vectors.ll and introduce merge-vectors-complex.ll with mismatched types. Related PR: #134436 This is a reland of #138155, which was reverted due to missed nits.
IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025
IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025
IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025
Autogenerate checks for merge-vectors.ll and introduce merge-vectors-complex.ll with mismatched types. Related PR: llvm#134436 This is a reland of llvm#138155, which was reverted due to missed nits.
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request May 6, 2025
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request May 6, 2025
Autogenerate checks for merge-vectors.ll and introduce merge-vectors-complex.ll with mismatched types. Related PR: llvm/llvm-project#134436 This is a reland of llvm/llvm-project#138155, which was reverted due to missed nits.
GeorgeARM pushed a commit to GeorgeARM/llvm-project that referenced this pull request May 7, 2025
GeorgeARM pushed a commit to GeorgeARM/llvm-project that referenced this pull request May 7, 2025
GeorgeARM pushed a commit to GeorgeARM/llvm-project that referenced this pull request May 7, 2025
Autogenerate checks for merge-vectors.ll and introduce merge-vectors-complex.ll with mismatched types. Related PR: llvm#134436 This is a reland of llvm#138155, which was reverted due to missed nits.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment