Skip to content

atomic fences cause errors and atomic instructions are emitted without fences on nvptx64-nvidia-cuda #136480

@usamoi

Description

@usamoi

I tried this code:

#![no_std] #![allow(internal_features)] #![feature(core_intrinsics)] use core::sync::atomic::AtomicU32; #[panic_handler] fn panic_handler(_: &core::panic::PanicInfo<'_>) -> ! { loop {} } static COUNTER: AtomicU32 = AtomicU32::new(0); // use intrinsics here, since inliner not work for `AtomicU32::fetch_add` #[no_mangle] fn atomic_xadd_seqcst() { unsafe { core::intrinsics::atomic_xadd_seqcst(COUNTER.as_ptr(), 1); } } #[no_mangle] fn atomic_xadd_acqrel() { unsafe { core::intrinsics::atomic_xadd_acqrel(COUNTER.as_ptr(), 1); } } #[no_mangle] fn atomic_xadd_relaxed() { unsafe { core::intrinsics::atomic_xadd_relaxed(COUNTER.as_ptr(), 1); } }
[unstable] build-std = ["core"] [build] target = "nvptx64-nvidia-cuda" rustflags = [ "-Clinker=llvm-bitcode-linker", "-Clinker-flavor=llbc", "-Zunstable-options", "-Clink-arg=-O3", "-Ctarget-cpu=sm_90", ]

Same instructions are generated for these 3 functions.

// // Generated by LLVM NVPTX Back-End // .version 7.8 .target sm_90 .address_size 64	// .globl	atomic_xadd_seqcst // -- Begin function atomic_xadd_seqcst .global .align 4 .b8 _ZN2nv7COUNTER17h4c11a16573192bf7E[4]; // @atomic_xadd_seqcst .visible .func atomic_xadd_seqcst() {	.reg .b32	%r<2>;	.reg .b64	%rd<2>; // %bb.0:	mov.u64	%rd1, _ZN2nv7COUNTER17h4c11a16573192bf7E;	atom.global.add.u32	%r1, [%rd1], 1;	ret; // -- End function }	// .globl	atomic_xadd_acqrel // -- Begin function atomic_xadd_acqrel .visible .func atomic_xadd_acqrel() // @atomic_xadd_acqrel {	.reg .b32	%r<2>;	.reg .b64	%rd<2>; // %bb.0:	mov.u64	%rd1, _ZN2nv7COUNTER17h4c11a16573192bf7E;	atom.global.add.u32	%r1, [%rd1], 1;	ret; // -- End function }	// .globl	atomic_xadd_relaxed // -- Begin function atomic_xadd_relaxed .visible .func atomic_xadd_relaxed() // @atomic_xadd_relaxed {	.reg .b32	%r<2>;	.reg .b64	%rd<2>; // %bb.0:	mov.u64	%rd1, _ZN2nv7COUNTER17h4c11a16573192bf7E;	atom.global.add.u32	%r1, [%rd1], 1;	ret; // -- End function } 

Different instruction should be emitted.

Link for PTX document:

Parallel Synchronization and Communication Instructions: atom

Meta

rustc --version --verbose:

rustc 1.86.0-nightly (8239a37f9 2025-02-01) binary: rustc commit-hash: 8239a37f9c0951a037cfc51763ea52a20e71e6bd commit-date: 2025-02-01 host: x86_64-unknown-linux-gnu release: 1.86.0-nightly LLVM version: 19.1.7 

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-atomicArea: Atomics, barriers, and sync primitivesC-bugCategory: This is a bug.O-NVPTXTarget: the NVPTX LLVM backend for running rust on GPUs, https://llvm.org/docs/NVPTXUsage.htmlT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions