- Notifications
You must be signed in to change notification settings - Fork 15.3k
[AMDGPU] Fix setting nontemporal in memory legalizer #83815
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Fix setting nontemporal in memory legalizer #83815
Conversation
|
|
| @llvm/pr-subscribers-backend-amdgpu Author: Mirko Brkušanin (mbrkusanin) ChangesIterator MI can advance in insertWait() but we need original instruction to Patch is 1.84 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83815.diff 24 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 4069a368f68719..73c23f0f987c3d 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -2392,6 +2392,11 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal( bool Changed = false; + if (IsNonTemporal) { + // Set non-temporal hint for all cache levels. + Changed |= setTH(MI, AMDGPU::CPol::TH_NT); + } + if (IsVolatile) { Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS); @@ -2407,11 +2412,6 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal( Position::AFTER); } - if (IsNonTemporal) { - // Set non-temporal hint for all cache levels. - Changed |= setTH(MI, AMDGPU::CPol::TH_NT); - } - return Changed; } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll index 77962fadcacfc6..e13542f61474e2 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll @@ -10,6 +10,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s define amdgpu_kernel void @singlethread_acquire_fence() { ; GFX6-LABEL: singlethread_acquire_fence: @@ -55,6 +57,14 @@ define amdgpu_kernel void @singlethread_acquire_fence() { ; GFX11-CU-LABEL: singlethread_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: singlethread_acquire_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: singlethread_acquire_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("singlethread") acquire ret void @@ -104,6 +114,14 @@ define amdgpu_kernel void @singlethread_release_fence() { ; GFX11-CU-LABEL: singlethread_release_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: singlethread_release_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: singlethread_release_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("singlethread") release ret void @@ -153,6 +171,14 @@ define amdgpu_kernel void @singlethread_acq_rel_fence() { ; GFX11-CU-LABEL: singlethread_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: singlethread_acq_rel_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: singlethread_acq_rel_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("singlethread") acq_rel ret void @@ -202,6 +228,14 @@ define amdgpu_kernel void @singlethread_seq_cst_fence() { ; GFX11-CU-LABEL: singlethread_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: singlethread_seq_cst_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: singlethread_seq_cst_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("singlethread") seq_cst ret void @@ -251,6 +285,14 @@ define amdgpu_kernel void @singlethread_one_as_acquire_fence() { ; GFX11-CU-LABEL: singlethread_one_as_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: singlethread_one_as_acquire_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: singlethread_one_as_acquire_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("singlethread-one-as") acquire ret void @@ -300,6 +342,14 @@ define amdgpu_kernel void @singlethread_one_as_release_fence() { ; GFX11-CU-LABEL: singlethread_one_as_release_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: singlethread_one_as_release_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: singlethread_one_as_release_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("singlethread-one-as") release ret void @@ -349,6 +399,14 @@ define amdgpu_kernel void @singlethread_one_as_acq_rel_fence() { ; GFX11-CU-LABEL: singlethread_one_as_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: singlethread_one_as_acq_rel_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: singlethread_one_as_acq_rel_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("singlethread-one-as") acq_rel ret void @@ -398,6 +456,14 @@ define amdgpu_kernel void @singlethread_one_as_seq_cst_fence() { ; GFX11-CU-LABEL: singlethread_one_as_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: singlethread_one_as_seq_cst_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: singlethread_one_as_seq_cst_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("singlethread-one-as") seq_cst ret void @@ -447,6 +513,14 @@ define amdgpu_kernel void @wavefront_acquire_fence() { ; GFX11-CU-LABEL: wavefront_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: wavefront_acquire_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: wavefront_acquire_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("wavefront") acquire ret void @@ -496,6 +570,14 @@ define amdgpu_kernel void @wavefront_release_fence() { ; GFX11-CU-LABEL: wavefront_release_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: wavefront_release_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: wavefront_release_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("wavefront") release ret void @@ -545,6 +627,14 @@ define amdgpu_kernel void @wavefront_acq_rel_fence() { ; GFX11-CU-LABEL: wavefront_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: wavefront_acq_rel_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: wavefront_acq_rel_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("wavefront") acq_rel ret void @@ -594,6 +684,14 @@ define amdgpu_kernel void @wavefront_seq_cst_fence() { ; GFX11-CU-LABEL: wavefront_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: wavefront_seq_cst_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: wavefront_seq_cst_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("wavefront") seq_cst ret void @@ -643,6 +741,14 @@ define amdgpu_kernel void @wavefront_one_as_acquire_fence() { ; GFX11-CU-LABEL: wavefront_one_as_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: wavefront_one_as_acquire_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: wavefront_one_as_acquire_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("wavefront-one-as") acquire ret void @@ -692,6 +798,14 @@ define amdgpu_kernel void @wavefront_one_as_release_fence() { ; GFX11-CU-LABEL: wavefront_one_as_release_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: wavefront_one_as_release_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: wavefront_one_as_release_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("wavefront-one-as") release ret void @@ -741,6 +855,14 @@ define amdgpu_kernel void @wavefront_one_as_acq_rel_fence() { ; GFX11-CU-LABEL: wavefront_one_as_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: wavefront_one_as_acq_rel_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: wavefront_one_as_acq_rel_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("wavefront-one-as") acq_rel ret void @@ -790,6 +912,14 @@ define amdgpu_kernel void @wavefront_one_as_seq_cst_fence() { ; GFX11-CU-LABEL: wavefront_one_as_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: wavefront_one_as_seq_cst_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: wavefront_one_as_seq_cst_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("wavefront-one-as") seq_cst ret void @@ -843,6 +973,15 @@ define amdgpu_kernel void @workgroup_acquire_fence() { ; GFX11-CU-LABEL: workgroup_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: workgroup_acquire_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: workgroup_acquire_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("workgroup") acquire ret void @@ -892,6 +1031,14 @@ define amdgpu_kernel void @workgroup_release_fence() { ; GFX11-CU-LABEL: workgroup_release_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: workgroup_release_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: workgroup_release_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("workgroup") release ret void @@ -945,6 +1092,15 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; GFX11-CU-LABEL: workgroup_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: workgroup_acq_rel_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: workgroup_acq_rel_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("workgroup") acq_rel ret void @@ -998,6 +1154,15 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; GFX11-CU-LABEL: workgroup_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: workgroup_seq_cst_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: workgroup_seq_cst_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("workgroup") seq_cst ret void @@ -1051,6 +1216,15 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() { ; GFX11-CU-LABEL: workgroup_one_as_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: workgroup_one_as_acquire_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: workgroup_one_as_acquire_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("workgroup-one-as") acquire ret void @@ -1100,6 +1274,14 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() { ; GFX11-CU-LABEL: workgroup_one_as_release_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: workgroup_one_as_release_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: workgroup_one_as_release_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("workgroup-one-as") release ret void @@ -1153,6 +1335,15 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { ; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: workgroup_one_as_acq_rel_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("workgroup-one-as") acq_rel ret void @@ -1206,6 +1397,15 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { ; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: workgroup_one_as_seq_cst_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("workgroup-one-as") seq_cst ret void @@ -1269,6 +1469,16 @@ define amdgpu_kernel void @agent_acquire_fence() { ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: agent_acquire_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: agent_acquire_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("agent") acquire ret void @@ -1320,6 +1530,14 @@ define amdgpu_kernel void @agent_release_fence() { ; GFX11-CU-LABEL: agent_release_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: agent_release_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: agent_release_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("agent") release ret void @@ -1385,6 +1603,16 @@ define amdgpu_kernel void @agent_acq_rel_fence() { ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: agent_acq_rel_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: agent_acq_rel_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("agent") acq_rel ret void @@ -1450,6 +1678,16 @@ define amdgpu_kernel void @agent_seq_cst_fence() { ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: agent_seq_cst_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: agent_seq_cst_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("agent") seq_cst ret void @@ -1513,6 +1751,16 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() { ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: agent_one_as_acquire_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: agent_one_as_acquire_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("agent-one-as") acquire ret void @@ -1564,6 +1812,14 @@ define amdgpu_kernel void @agent_one_as_release_fence() { ; GFX11-CU-LABEL: agent_one_as_release_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: agent_one_as_release_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: agent_one_as_release_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("agent-one-as") release ret void @@ -1629,6 +1885,16 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() { ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: agent_one_as_acq_rel_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: agent_one_as_acq_rel_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("agent-one-as") acq_rel ret void @@ -1694,6 +1960,16 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() { ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: agent_one_as_seq_cst_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: agent_one_as_seq_cst_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm entry: fence syncscope("agent-one-as") seq_cst ret void @@ -1759,6 +2035,16 @@ define amdgpu_kernel void @system_acquire_fence() { ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: system_acquire_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: system_acquire_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS +; GFX12-CU-NEXT: s_endpgm entry: fence acquire ret void @@ -1812,6 +2098,14 @@ define amdgpu_kernel void @system_release_fence() { ; GFX11-CU-LABEL: system_release_fence: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: system_release_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: system_release_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_endpgm entry: fence release ret void @@ -1881,6 +2175,16 @@ define amdgpu_kernel void @system_acq_rel_fence() { ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: system_acq_rel_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: system_acq_rel_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS +; GFX12-CU-NEXT: s_endpgm entry: fence acq_rel ret void @@ -1950,6 +2254,16 @@ define amdgpu_kernel void @system_seq_cst_fence() { ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: system_seq_cst_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: system_seq_cst_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS +; GFX12-CU-NEXT: s_endpgm e... [truncated] |
Iterator MI can advance in insertWait() but we need original instruction to set temporal hint. Just move it before handling volatile.
c5f3f7c to febe0d9 Compare | Rebase |
| I wonder if this is worth backporting to the LLVM 18 release branch. |
Iterator MI can advance in insertWait() but we need original instruction to set temporal hint. Just move it before handling volatile.
Iterator MI can advance in insertWait() but we need original instruction to set temporal hint. Just move it before handling volatile.
Iterator MI can advance in insertWait() but we need original instruction to
set temporal hint. Just move it before handling volatile.