@@ -18140,9 +18140,35 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
1814018140 break;
1814118141 }
1814218142
18143+ // Some of the atomic builtins take the scope as a string name.
1814318144 StringRef scp;
18144- llvm::getConstantStringInfo(Scope, scp);
18145- SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18145+ if (llvm::getConstantStringInfo(Scope, scp)) {
18146+ SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18147+ return;
18148+ }
18149+
18150+ // Older builtins had an enum argument for the memory scope.
18151+ int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
18152+ switch (scope) {
18153+ case 0: // __MEMORY_SCOPE_SYSTEM
18154+ SSID = llvm::SyncScope::System;
18155+ break;
18156+ case 1: // __MEMORY_SCOPE_DEVICE
18157+ SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
18158+ break;
18159+ case 2: // __MEMORY_SCOPE_WRKGRP
18160+ SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
18161+ break;
18162+ case 3: // __MEMORY_SCOPE_WVFRNT
18163+ SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
18164+ break;
18165+ case 4: // __MEMORY_SCOPE_SINGLE
18166+ SSID = llvm::SyncScope::SingleThread;
18167+ break;
18168+ default:
18169+ SSID = llvm::SyncScope::System;
18170+ break;
18171+ }
1814618172}
1814718173
1814818174llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
@@ -18558,14 +18584,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1855818584 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
1855918585 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
1856018586 }
18561- case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1856218587 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1856318588 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
1856418589 Intrinsic::ID Intrin;
1856518590 switch (BuiltinID) {
18566- case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18567- Intrin = Intrinsic::amdgcn_ds_fadd;
18568- break;
1856918591 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1857018592 Intrin = Intrinsic::amdgcn_ds_fmin;
1857118593 break;
@@ -18656,35 +18678,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1865618678 llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
1865718679 return Builder.CreateCall(F, {Addr, Val});
1865818680 }
18659- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18660- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18661- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
18662- Intrinsic::ID IID;
18663- llvm::Type *ArgTy;
18664- switch (BuiltinID) {
18665- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18666- ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18667- IID = Intrinsic::amdgcn_ds_fadd;
18668- break;
18669- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18670- ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18671- IID = Intrinsic::amdgcn_ds_fadd;
18672- break;
18673- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
18674- ArgTy = llvm::FixedVectorType::get(
18675- llvm::Type::getHalfTy(getLLVMContext()), 2);
18676- IID = Intrinsic::amdgcn_ds_fadd;
18677- break;
18678- }
18679- llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18680- llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18681- llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
18682- llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
18683- llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
18684- llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
18685- llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
18686- return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
18687- }
1868818681 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
1868918682 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
1869018683 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19044,7 +19037,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1904419037 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1904519038 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1904619039 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19047- case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
19040+ case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19041+ case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19042+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19043+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19044+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19045+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: {
1904819046 llvm::AtomicRMWInst::BinOp BinOp;
1904919047 switch (BuiltinID) {
1905019048 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19055,23 +19053,54 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1905519053 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1905619054 BinOp = llvm::AtomicRMWInst::UDecWrap;
1905719055 break;
19056+ case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19057+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19058+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19059+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19060+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
19061+ BinOp = llvm::AtomicRMWInst::FAdd;
19062+ break;
1905819063 }
1905919064
1906019065 Address Ptr = CheckAtomicAlignment(*this, E);
1906119066 Value *Val = EmitScalarExpr(E->getArg(1));
19067+ llvm::Type *OrigTy = Val->getType();
19068+ QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1906219069
19063- ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
19064- EmitScalarExpr(E->getArg(3)), AO, SSID);
19070+ bool Volatile;
1906519071
19066- QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
19067- bool Volatile =
19068- PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
19072+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf) {
19073+ // __builtin_amdgcn_ds_faddf has an explicit volatile argument
19074+ Volatile =
19075+ cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
19076+ } else {
19077+ // Infer volatile from the passed type.
19078+ Volatile =
19079+ PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
19080+ }
19081+
19082+ if (E->getNumArgs() >= 4) {
19083+ // Some of the builtins have explicit ordering and scope arguments.
19084+ ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
19085+ EmitScalarExpr(E->getArg(3)), AO, SSID);
19086+ } else {
19087+ // The ds_fadd_* builtins do not have syncscope/order arguments.
19088+ SSID = llvm::SyncScope::System;
19089+ AO = AtomicOrdering::SequentiallyConsistent;
19090+
19091+ // The v2bf16 builtin uses i16 instead of a natural bfloat type.
19092+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {
19093+ llvm::Type *V2BF16Ty = FixedVectorType::get(
19094+ llvm::Type::getBFloatTy(Builder.getContext()), 2);
19095+ Val = Builder.CreateBitCast(Val, V2BF16Ty);
19096+ }
19097+ }
1906919098
1907019099 llvm::AtomicRMWInst *RMW =
1907119100 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1907219101 if (Volatile)
1907319102 RMW->setVolatile(true);
19074- return RMW;
19103+ return Builder.CreateBitCast( RMW, OrigTy) ;
1907519104 }
1907619105 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1907719106 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
0 commit comments