- Notifications
You must be signed in to change notification settings - Fork 14.1k
Open
Labels
A-atomicArea: Atomics, barriers, and sync primitivesArea: Atomics, barriers, and sync primitivesC-bugCategory: This is a bug.Category: This is a bug.O-NVPTXTarget: the NVPTX LLVM backend for running rust on GPUs, https://llvm.org/docs/NVPTXUsage.htmlTarget: the NVPTX LLVM backend for running rust on GPUs, https://llvm.org/docs/NVPTXUsage.htmlT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.Relevant to the compiler team, which will review and decide on the PR/issue.
Description
I tried this code:
#![no_std] #![allow(internal_features)] #![feature(core_intrinsics)] use core::sync::atomic::AtomicU32; #[panic_handler] fn panic_handler(_: &core::panic::PanicInfo<'_>) -> ! { loop {} } static COUNTER: AtomicU32 = AtomicU32::new(0); // use intrinsics here, since inliner not work for `AtomicU32::fetch_add` #[no_mangle] fn atomic_xadd_seqcst() { unsafe { core::intrinsics::atomic_xadd_seqcst(COUNTER.as_ptr(), 1); } } #[no_mangle] fn atomic_xadd_acqrel() { unsafe { core::intrinsics::atomic_xadd_acqrel(COUNTER.as_ptr(), 1); } } #[no_mangle] fn atomic_xadd_relaxed() { unsafe { core::intrinsics::atomic_xadd_relaxed(COUNTER.as_ptr(), 1); } }[unstable] build-std = ["core"] [build] target = "nvptx64-nvidia-cuda" rustflags = [ "-Clinker=llvm-bitcode-linker", "-Clinker-flavor=llbc", "-Zunstable-options", "-Clink-arg=-O3", "-Ctarget-cpu=sm_90", ]Same instructions are generated for these 3 functions.
// // Generated by LLVM NVPTX Back-End // .version 7.8 .target sm_90 .address_size 64 // .globl atomic_xadd_seqcst // -- Begin function atomic_xadd_seqcst .global .align 4 .b8 _ZN2nv7COUNTER17h4c11a16573192bf7E[4]; // @atomic_xadd_seqcst .visible .func atomic_xadd_seqcst() { .reg .b32 %r<2>; .reg .b64 %rd<2>; // %bb.0: mov.u64 %rd1, _ZN2nv7COUNTER17h4c11a16573192bf7E; atom.global.add.u32 %r1, [%rd1], 1; ret; // -- End function } // .globl atomic_xadd_acqrel // -- Begin function atomic_xadd_acqrel .visible .func atomic_xadd_acqrel() // @atomic_xadd_acqrel { .reg .b32 %r<2>; .reg .b64 %rd<2>; // %bb.0: mov.u64 %rd1, _ZN2nv7COUNTER17h4c11a16573192bf7E; atom.global.add.u32 %r1, [%rd1], 1; ret; // -- End function } // .globl atomic_xadd_relaxed // -- Begin function atomic_xadd_relaxed .visible .func atomic_xadd_relaxed() // @atomic_xadd_relaxed { .reg .b32 %r<2>; .reg .b64 %rd<2>; // %bb.0: mov.u64 %rd1, _ZN2nv7COUNTER17h4c11a16573192bf7E; atom.global.add.u32 %r1, [%rd1], 1; ret; // -- End function } Different instruction should be emitted.
Link for PTX document:
Parallel Synchronization and Communication Instructions: atom
Meta
rustc --version --verbose:
rustc 1.86.0-nightly (8239a37f9 2025-02-01) binary: rustc commit-hash: 8239a37f9c0951a037cfc51763ea52a20e71e6bd commit-date: 2025-02-01 host: x86_64-unknown-linux-gnu release: 1.86.0-nightly LLVM version: 19.1.7 Metadata
Metadata
Assignees
Labels
A-atomicArea: Atomics, barriers, and sync primitivesArea: Atomics, barriers, and sync primitivesC-bugCategory: This is a bug.Category: This is a bug.O-NVPTXTarget: the NVPTX LLVM backend for running rust on GPUs, https://llvm.org/docs/NVPTXUsage.htmlTarget: the NVPTX LLVM backend for running rust on GPUs, https://llvm.org/docs/NVPTXUsage.htmlT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.Relevant to the compiler team, which will review and decide on the PR/issue.