9595#include < algorithm>
9696
9797#define DEBUG_TYPE " amdgpu-sw-lower-lds"
98+ #define COV5_HIDDEN_DYN_LDS_SIZE_ARG 15
9899
99100using namespace llvm ;
100101using namespace AMDGPU ;
@@ -153,6 +154,10 @@ class AMDGPUSwLowerLDS {
153154 void lowerNonKernelLDSAccesses (Function *Func,
154155 SetVector<GlobalVariable *> &LDSGlobals,
155156 NonKernelLDSParameters &NKLDSParams);
157+ void
158+ updateMallocSizeForDynamicLDS (Function *Func, Value *CurrMallocSize,
159+ Value *HiddenDynLDSSize,
160+ SetVector<GlobalVariable *> &DynamicLDSGlobals);
156161
157162private:
158163 Module &M;
@@ -195,7 +200,6 @@ SetVector<Function *> AMDGPUSwLowerLDS::getOrderedIndirectLDSAccessingKernels(
195200 Function *Func = OrderedKernels[i];
196201 Func->setMetadata (" llvm.amdgcn.lds.kernel.id" ,
197202 MDNode::get (Ctx, AttrMDArgs));
198- auto &LDSParams = KernelToLDSParametersMap[Func];
199203 }
200204 return std::move (OrderedKernels);
201205}
@@ -232,6 +236,9 @@ void AMDGPUSwLowerLDS::populateSwLDSGlobal(Function *Func) {
232236 M, IRB.getPtrTy (), false , GlobalValue::InternalLinkage,
233237 PoisonValue::get (IRB.getPtrTy ()), " llvm.amdgcn.sw.lds." + Func->getName (),
234238 nullptr , GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, false );
239+ GlobalValue::SanitizerMetadata MD;
240+ MD.NoAddress = true ;
241+ LDSParams.SwLDS ->setSanitizerMetadata (MD);
235242 return ;
236243}
237244
@@ -265,7 +272,7 @@ void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) {
265272 // {StartOffset, AlignedSizeInBytes}
266273 SmallString<128 > MDItemStr;
267274 raw_svector_ostream MDItemOS (MDItemStr);
268- MDItemOS << " llvm.amdgcn.sw.lds." << Func->getName (). str () << " .md.item" ;
275+ MDItemOS << " llvm.amdgcn.sw.lds." << Func->getName () << " .md.item" ;
269276
270277 StructType *LDSItemTy =
271278 StructType::create (Ctx, {Int32Ty, Int32Ty, Int32Ty}, MDItemOS.str ());
@@ -296,13 +303,13 @@ void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) {
296303
297304 SmallString<128 > MDTypeStr;
298305 raw_svector_ostream MDTypeOS (MDTypeStr);
299- MDTypeOS << " llvm.amdgcn.sw.lds." << Func->getName (). str () << " .md.type" ;
306+ MDTypeOS << " llvm.amdgcn.sw.lds." << Func->getName () << " .md.type" ;
300307
301308 StructType *MetadataStructType =
302309 StructType::create (Ctx, Items, MDTypeOS.str ());
303310 SmallString<128 > MDStr;
304311 raw_svector_ostream MDOS (MDStr);
305- MDOS << " llvm.amdgcn.sw.lds." << Func->getName (). str () << " .md" ;
312+ MDOS << " llvm.amdgcn.sw.lds." << Func->getName () << " .md" ;
306313 LDSParams.SwLDSMetadata = new GlobalVariable (
307314 M, MetadataStructType, false , GlobalValue::InternalLinkage,
308315 PoisonValue::get (MetadataStructType), MDOS.str (), nullptr ,
@@ -387,8 +394,7 @@ void AMDGPUSwLowerLDS::replaceKernelLDSAccesses(Function *Func) {
387394 Value *Load = IRB.CreateLoad (Int32Ty, GEP);
388395 Value *BasePlusOffset =
389396 IRB.CreateInBoundsGEP (IRB.getInt8Ty (), SwLDS, {Load});
390- LLVM_DEBUG (dbgs () << " Sw LDS Lowering, Replacing LDS "
391- << GV->getName ().str ());
397+ LLVM_DEBUG (dbgs () << " Sw LDS Lowering, Replacing LDS " << GV->getName ());
392398 replacesUsesOfGlobalInFunction (Func, GV, BasePlusOffset);
393399 }
394400 };
@@ -398,10 +404,57 @@ void AMDGPUSwLowerLDS::replaceKernelLDSAccesses(Function *Func) {
398404 ReplaceLDSGlobalUses (IndirectAccess.DynamicLDSGlobals );
399405}
400406
407+ void AMDGPUSwLowerLDS::updateMallocSizeForDynamicLDS (
408+ Function *Func, Value *CurrMallocSize, Value *HiddenDynLDSSize,
409+ SetVector<GlobalVariable *> &DynamicLDSGlobals) {
410+ auto &LDSParams = KernelToLDSParametersMap[Func];
411+ Type *Int32Ty = IRB.getInt32Ty ();
412+
413+ GlobalVariable *SwLDS = LDSParams.SwLDS ;
414+ GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata ;
415+ assert (SwLDS && SwLDSMetadata);
416+ StructType *MetadataStructType =
417+ cast<StructType>(SwLDSMetadata->getValueType ());
418+ unsigned MaxAlignment = SwLDS->getAlignment ();
419+ Value *MaxAlignValue = IRB.getInt32 (MaxAlignment);
420+ Value *MaxAlignValueMinusOne = IRB.getInt32 (MaxAlignment - 1 );
421+
422+ for (GlobalVariable *DynGV : DynamicLDSGlobals) {
423+ auto &Indices = LDSParams.LDSToReplacementIndicesMap [DynGV];
424+ // Update the Offset metadata.
425+ Constant *Index0 = ConstantInt::get (Int32Ty, 0 );
426+ Constant *Index1 = ConstantInt::get (Int32Ty, Indices[1 ]);
427+
428+ Constant *Index2Offset = ConstantInt::get (Int32Ty, 0 );
429+ auto *GEPForOffset = IRB.CreateInBoundsGEP (
430+ MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2Offset});
431+
432+ IRB.CreateStore (CurrMallocSize, GEPForOffset);
433+ // Update the size and Aligned Size metadata.
434+ Constant *Index2Size = ConstantInt::get (Int32Ty, 1 );
435+ auto *GEPForSize = IRB.CreateInBoundsGEP (MetadataStructType, SwLDSMetadata,
436+ {Index0, Index1, Index2Size});
437+
438+ Value *CurrDynLDSSize = IRB.CreateLoad (Int32Ty, HiddenDynLDSSize);
439+ IRB.CreateStore (CurrDynLDSSize, GEPForSize);
440+ Constant *Index2AlignedSize = ConstantInt::get (Int32Ty, 1 );
441+ auto *GEPForAlignedSize = IRB.CreateInBoundsGEP (
442+ MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2AlignedSize});
443+
444+ Value *AlignedDynLDSSize =
445+ IRB.CreateAdd (CurrDynLDSSize, MaxAlignValueMinusOne);
446+ AlignedDynLDSSize = IRB.CreateUDiv (AlignedDynLDSSize, MaxAlignValue);
447+ AlignedDynLDSSize = IRB.CreateMul (AlignedDynLDSSize, MaxAlignValue);
448+ IRB.CreateStore (AlignedDynLDSSize, GEPForAlignedSize);
449+
450+ // Update the Current Malloc Size
451+ CurrMallocSize = IRB.CreateAdd (CurrMallocSize, AlignedDynLDSSize);
452+ }
453+ }
454+
401455void AMDGPUSwLowerLDS::lowerKernelLDSAccesses (Function *Func,
402456 DomTreeUpdater &DTU) {
403- LLVM_DEBUG (dbgs () << " Sw Lowering Kernel LDS for : "
404- << Func->getName ().str ());
457+ LLVM_DEBUG (dbgs () << " Sw Lowering Kernel LDS for : " << Func->getName ());
405458 auto &LDSParams = KernelToLDSParametersMap[Func];
406459 auto &Ctx = M.getContext ();
407460 auto *PrevEntryBlock = &Func->getEntryBlock ();
@@ -423,12 +476,6 @@ void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func,
423476 auto *const XYZOr = IRB.CreateOr (XYOr, WIdz);
424477 auto *const WIdzCond = IRB.CreateICmpEQ (XYZOr, IRB.getInt32 (0 ));
425478
426- GlobalVariable *SwLDS = LDSParams.SwLDS ;
427- GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata ;
428- assert (SwLDS && SwLDSMetadata);
429- StructType *MetadataStructType =
430- cast<StructType>(SwLDSMetadata->getValueType ());
431-
432479 // All work items will branch to PrevEntryBlock except {0,0,0} index
433480 // work item which will branch to malloc block.
434481 IRB.CreateCondBr (WIdzCond, MallocBlock, PrevEntryBlock);
@@ -439,79 +486,56 @@ void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func,
439486 // If Dynamic LDS globals are accessed by the kernel,
440487 // Get the size of dyn lds from hidden dyn_lds_size kernel arg.
441488 // Update the corresponding metadata global entries for this dyn lds global.
489+ GlobalVariable *SwLDS = LDSParams.SwLDS ;
490+ GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata ;
491+ assert (SwLDS && SwLDSMetadata);
492+ StructType *MetadataStructType =
493+ cast<StructType>(SwLDSMetadata->getValueType ());
442494 uint32_t MallocSize = 0 ;
443495 Value *CurrMallocSize;
496+ Type *Int32Ty = IRB.getInt32Ty ();
497+ Type *Int64Ty = IRB.getInt64Ty ();
444498
445499 unsigned NumStaticLDS = LDSParams.DirectAccess .StaticLDSGlobals .size () +
446500 LDSParams.IndirectAccess .StaticLDSGlobals .size ();
447501 unsigned NumDynLDS = LDSParams.DirectAccess .DynamicLDSGlobals .size () +
448502 LDSParams.IndirectAccess .DynamicLDSGlobals .size ();
449503
450504 if (NumStaticLDS) {
451- auto *GEPForEndStaticLDSOffset = IRB.CreateInBoundsGEP (
452- MetadataStructType, SwLDSMetadata,
453- {IRB.getInt32 (0 ), IRB.getInt32 (NumStaticLDS - 1 ), IRB.getInt32 (0 )});
454-
455- auto *GEPForEndStaticLDSSize = IRB.CreateInBoundsGEP (
456- MetadataStructType, SwLDSMetadata,
457- {IRB.getInt32 (0 ), IRB.getInt32 (NumStaticLDS - 1 ), IRB.getInt32 (2 )});
505+ auto *GEPForEndStaticLDSOffset =
506+ IRB.CreateInBoundsGEP (MetadataStructType, SwLDSMetadata,
507+ {ConstantInt::get (Int32Ty, 0 ),
508+ ConstantInt::get (Int32Ty, NumStaticLDS - 1 ),
509+ ConstantInt::get (Int32Ty, 0 )});
510+
511+ auto *GEPForEndStaticLDSSize =
512+ IRB.CreateInBoundsGEP (MetadataStructType, SwLDSMetadata,
513+ {ConstantInt::get (Int32Ty, 0 ),
514+ ConstantInt::get (Int32Ty, NumStaticLDS - 1 ),
515+ ConstantInt::get (Int32Ty, 2 )});
458516
459517 Value *EndStaticLDSOffset =
460- IRB.CreateLoad (IRB.getInt64Ty (), GEPForEndStaticLDSOffset);
461- Value *EndStaticLDSSize =
462- IRB.CreateLoad (IRB.getInt64Ty (), GEPForEndStaticLDSSize);
518+ IRB.CreateLoad (Int32Ty, GEPForEndStaticLDSOffset);
519+ Value *EndStaticLDSSize = IRB.CreateLoad (Int32Ty, GEPForEndStaticLDSSize);
463520 CurrMallocSize = IRB.CreateAdd (EndStaticLDSOffset, EndStaticLDSSize);
464521 } else
465- CurrMallocSize = IRB.getInt64 (MallocSize);
522+ CurrMallocSize = IRB.getInt32 (MallocSize);
466523
467524 if (NumDynLDS) {
468- unsigned MaxAlignment = SwLDS->getAlignment ();
469- Value *MaxAlignValue = IRB.getInt64 (MaxAlignment);
470- Value *MaxAlignValueMinusOne = IRB.getInt64 (MaxAlignment - 1 );
471-
525+ // Get size from hidden dyn_lds_size argument of kernel
472526 Value *ImplicitArg =
473527 IRB.CreateIntrinsic (Intrinsic::amdgcn_implicitarg_ptr, {}, {});
474528 Value *HiddenDynLDSSize = IRB.CreateInBoundsGEP (
475- ImplicitArg->getType (), ImplicitArg, {IRB.getInt32 (15 )});
476-
477- auto MallocSizeCalcLambda =
478- [&](SetVector<GlobalVariable *> &DynamicLDSGlobals) {
479- for (GlobalVariable *DynGV : DynamicLDSGlobals) {
480- auto &Indices = LDSParams.LDSToReplacementIndicesMap [DynGV];
481-
482- // Update the Offset metadata.
483- auto *GEPForOffset = IRB.CreateInBoundsGEP (
484- MetadataStructType, SwLDSMetadata,
485- {IRB.getInt32 (0 ), IRB.getInt32 (Indices[1 ]), IRB.getInt32 (0 )});
486- IRB.CreateStore (CurrMallocSize, GEPForOffset);
487-
488- // Get size from hidden dyn_lds_size argument of kernel
489- // Update the size and Aligned Size metadata.
490- auto *GEPForSize = IRB.CreateInBoundsGEP (
491- MetadataStructType, SwLDSMetadata,
492- {IRB.getInt32 (0 ), IRB.getInt32 (Indices[1 ]), IRB.getInt32 (1 )});
493- Value *CurrDynLDSSize =
494- IRB.CreateLoad (IRB.getInt64Ty (), HiddenDynLDSSize);
495- IRB.CreateStore (CurrDynLDSSize, GEPForSize);
496-
497- auto *GEPForAlignedSize = IRB.CreateInBoundsGEP (
498- MetadataStructType, SwLDSMetadata,
499- {IRB.getInt32 (0 ), IRB.getInt32 (Indices[1 ]), IRB.getInt32 (2 )});
500- Value *AlignedDynLDSSize =
501- IRB.CreateAdd (CurrDynLDSSize, MaxAlignValueMinusOne);
502- AlignedDynLDSSize =
503- IRB.CreateUDiv (AlignedDynLDSSize, MaxAlignValue);
504- AlignedDynLDSSize = IRB.CreateMul (AlignedDynLDSSize, MaxAlignValue);
505- IRB.CreateStore (AlignedDynLDSSize, GEPForAlignedSize);
506-
507- // Update the Current Malloc Size
508- CurrMallocSize = IRB.CreateAdd (CurrMallocSize, AlignedDynLDSSize);
509- }
510- };
511- MallocSizeCalcLambda (LDSParams.DirectAccess .DynamicLDSGlobals );
512- MallocSizeCalcLambda (LDSParams.IndirectAccess .DynamicLDSGlobals );
529+ ImplicitArg->getType (), ImplicitArg,
530+ {ConstantInt::get (Int64Ty, COV5_HIDDEN_DYN_LDS_SIZE_ARG)});
531+ updateMallocSizeForDynamicLDS (Func, CurrMallocSize, HiddenDynLDSSize,
532+ LDSParams.DirectAccess .DynamicLDSGlobals );
533+ updateMallocSizeForDynamicLDS (Func, CurrMallocSize, HiddenDynLDSSize,
534+ LDSParams.IndirectAccess .DynamicLDSGlobals );
513535 }
514536
537+ CurrMallocSize = IRB.CreateZExt (CurrMallocSize, Int64Ty);
538+
515539 // Create a call to malloc function which does device global memory allocation
516540 // with size equals to all LDS global accesses size in this kernel.
517541 FunctionCallee AMDGPUMallocFunc = M.getOrInsertFunction (
@@ -679,7 +703,7 @@ void AMDGPUSwLowerLDS::lowerNonKernelLDSAccesses(
679703 // Replace LDS access in non-kernel with replacement queried from
680704 // Base table and offset from offset table.
681705 LLVM_DEBUG (dbgs () << " Sw LDS lowering, lower non-kernel access for : "
682- << Func->getName (). str () );
706+ << Func->getName ());
683707 auto *EntryBlock = &Func->getEntryBlock ();
684708 IRB.SetInsertPoint (EntryBlock, EntryBlock->begin ());
685709 Function *Decl =
@@ -752,6 +776,8 @@ bool AMDGPUSwLowerLDS::run() {
752776 for (auto &K : LDSAccesses) {
753777 Function *F = K.first ;
754778 assert (isKernelLDS (F));
779+ if (!F->hasFnAttribute (Attribute::SanitizeAddress))
780+ continue ;
755781
756782 if (!KernelToLDSParametersMap.contains (F)) {
757783 KernelLDSParameters KernelLDSParams;
@@ -816,6 +842,8 @@ bool AMDGPUSwLowerLDS::run() {
816842 buildNonKernelLDSOffsetTable (NKLDSParams);
817843 for (auto &K : NonKernelToLDSAccessMap) {
818844 Function *Func = K.first ;
845+ if (!Func->hasFnAttribute (Attribute::SanitizeAddress))
846+ continue ;
819847 DenseSet<GlobalVariable *> &LDSGlobals = K.second ;
820848 SetVector<GlobalVariable *> OrderedLDSGlobals = sortByName (
821849 std::vector<GlobalVariable *>(LDSGlobals.begin (), LDSGlobals.end ()));
0 commit comments