@@ -117,6 +117,18 @@ static cl::opt<bool>
117117 HoistCommon (" simplifycfg-hoist-common" , cl::Hidden, cl::init(true ),
118118 cl::desc(" Hoist common instructions up to the parent block" ));
119119
120+ static cl::opt<bool > HoistLoadsStoresWithCondFaulting (
121+ " simplifycfg-hoist-loads-stores-with-cond-faulting" , cl::Hidden,
122+ cl::init (true ),
123+ cl::desc(" Hoist loads/stores if the target supports "
124+ " conditional faulting" ));
125+
126+ static cl::opt<unsigned > HoistLoadsStoresWithCondFaultingThreshold (
127+ " hoist-loads-stores-with-cond-faulting-threshold" , cl::Hidden, cl::init(6 ),
128+ cl::desc(" Control the maximal conditonal load/store that we are willing "
129+ " to speculatively execute to eliminate conditional branch "
130+ " (default = 6)" ));
131+
120132static cl::opt<unsigned >
121133 HoistCommonSkipLimit (" simplifycfg-hoist-common-skip-limit" , cl::Hidden,
122134 cl::init (20 ),
@@ -2986,6 +2998,25 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
29862998 return BIEndProb < Likely;
29872999}
29883000
3001+ static bool isSafeCheapLoadStore (const Instruction *I,
3002+ const TargetTransformInfo &TTI) {
3003+ // Not handle volatile or atomic.
3004+ if (auto *L = dyn_cast<LoadInst>(I)) {
3005+ if (!L->isSimple ())
3006+ return false ;
3007+ } else if (auto *S = dyn_cast<StoreInst>(I)) {
3008+ if (!S->isSimple ())
3009+ return false ;
3010+ } else
3011+ return false ;
3012+
3013+ // llvm.masked.load/store use i32 for alignment while load/store use i64.
3014+ // That's why we have the alignment limitation.
3015+ // FIXME: Update the prototype of the intrinsics?
3016+ return TTI.hasConditionalLoadStoreForType (getLoadStoreType (I)) &&
3017+ getLoadStoreAlignment (I) < Value::MaximumAlignment;
3018+ }
3019+
29893020// / Speculate a conditional basic block flattening the CFG.
29903021// /
29913022// / Note that this is a very risky transform currently. Speculating
@@ -3060,6 +3091,9 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
30603091 SmallVector<Instruction *, 4 > SpeculatedDbgIntrinsics;
30613092
30623093 unsigned SpeculatedInstructions = 0 ;
3094+ bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
3095+ Options.HoistLoadsStoresWithCondFaulting ;
3096+ SmallVector<Instruction *, 2 > SpeculatedConditionalLoadsStores;
30633097 Value *SpeculatedStoreValue = nullptr ;
30643098 StoreInst *SpeculatedStore = nullptr ;
30653099 EphemeralValueTracker EphTracker;
@@ -3088,22 +3122,33 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
30883122
30893123 // Only speculatively execute a single instruction (not counting the
30903124 // terminator) for now.
3091- ++SpeculatedInstructions;
3125+ bool IsSafeCheapLoadStore = HoistLoadsStores &&
3126+ isSafeCheapLoadStore (&I, TTI) &&
3127+ SpeculatedConditionalLoadsStores.size () <
3128+ HoistLoadsStoresWithCondFaultingThreshold;
3129+ // Not count load/store into cost if target supports conditional faulting
3130+ // b/c it's cheap to speculate it.
3131+ if (IsSafeCheapLoadStore)
3132+ SpeculatedConditionalLoadsStores.push_back (&I);
3133+ else
3134+ ++SpeculatedInstructions;
3135+
30923136 if (SpeculatedInstructions > 1 )
30933137 return false ;
30943138
30953139 // Don't hoist the instruction if it's unsafe or expensive.
3096- if (!isSafeToSpeculativelyExecute (&I) &&
3097- !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore (
3098- &I, BB, ThenBB, EndBB))))
3140+ if (!IsSafeCheapLoadStore && !isSafeToSpeculativelyExecute (&I) &&
3141+ !(HoistCondStores && !SpeculatedStoreValue &&
3142+ (SpeculatedStoreValue =
3143+ isSafeToSpeculateStore (&I, BB, ThenBB, EndBB))))
30993144 return false ;
3100- if (!SpeculatedStoreValue &&
3145+ if (!IsSafeCheapLoadStore && ! SpeculatedStoreValue &&
31013146 computeSpeculationCost (&I, TTI) >
31023147 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
31033148 return false ;
31043149
31053150 // Store the store speculation candidate.
3106- if (SpeculatedStoreValue)
3151+ if (!SpeculatedStore && SpeculatedStoreValue)
31073152 SpeculatedStore = cast<StoreInst>(&I);
31083153
31093154 // Do not hoist the instruction if any of its operands are defined but not
@@ -3130,11 +3175,11 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
31303175
31313176 // Check that we can insert the selects and that it's not too expensive to do
31323177 // so.
3133- bool Convert = SpeculatedStore != nullptr ;
3178+ bool Convert =
3179+ SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty ();
31343180 InstructionCost Cost = 0 ;
31353181 Convert |= validateAndCostRequiredSelects (BB, ThenBB, EndBB,
3136- SpeculatedInstructions,
3137- Cost, TTI);
3182+ SpeculatedInstructions, Cost, TTI);
31383183 if (!Convert || Cost > Budget)
31393184 return false ;
31403185
@@ -3222,6 +3267,107 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
32223267 BB->splice (BI->getIterator (), ThenBB, ThenBB->begin (),
32233268 std::prev (ThenBB->end ()));
32243269
3270+ // If the target supports conditional faulting,
3271+ // we look for the following pattern:
3272+ // \code
3273+ // BB:
3274+ // ...
3275+ // %cond = icmp ult %x, %y
3276+ // br i1 %cond, label %TrueBB, label %FalseBB
3277+ // FalseBB:
3278+ // store i32 1, ptr %q, align 4
3279+ // ...
3280+ // TrueBB:
3281+ // %maskedloadstore = load i32, ptr %b, align 4
3282+ // store i32 %maskedloadstore, ptr %p, align 4
3283+ // ...
3284+ // \endcode
3285+ //
3286+ // and transform it into:
3287+ //
3288+ // \code
3289+ // BB:
3290+ // ...
3291+ // %cond = icmp ult %x, %y
3292+ // %maskedloadstore = cload i32, ptr %b, %cond
3293+ // cstore i32 %maskedloadstore, ptr %p, %cond
3294+ // cstore i32 1, ptr %q, ~%cond
3295+ // br i1 %cond, label %TrueBB, label %FalseBB
3296+ // FalseBB:
3297+ // ...
3298+ // TrueBB:
3299+ // ...
3300+ // \endcode
3301+ //
3302+ // where cload/cstore are represented by llvm.masked.load/store intrinsics,
3303+ // e.g.
3304+ //
3305+ // \code
3306+ // %vcond = bitcast i1 %cond to <1 x i1>
3307+ // %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
3308+ // (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
3309+ // %maskedloadstore = bitcast <1 x i32> %v0 to i32
3310+ // call void @llvm.masked.store.v1i32.p0
3311+ // (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
3312+ // %cond.not = xor i1 %cond, true
3313+ // %vcond.not = bitcast i1 %cond.not to <1 x i>
3314+ // call void @llvm.masked.store.v1i32.p0
3315+ // (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
3316+ // \endcode
3317+ //
3318+ // So we need to turn hoisted load/store into cload/cstore.
3319+ auto &Context = BI->getParent ()->getContext ();
3320+ auto *VCondTy = FixedVectorType::get (Type::getInt1Ty (Context), 1 );
3321+ auto *Cond = BI->getOperand (0 );
3322+ Value *Mask = nullptr ;
3323+ // Construct the condition if needed.
3324+ if (!SpeculatedConditionalLoadsStores.empty ()) {
3325+ IRBuilder<> Builder (SpeculatedConditionalLoadsStores.back ());
3326+ Mask = Builder.CreateBitCast (
3327+ Invert ? Builder.CreateXor (Cond, ConstantInt::getTrue (Context)) : Cond,
3328+ VCondTy);
3329+ }
3330+ for (auto *I : SpeculatedConditionalLoadsStores) {
3331+ IRBuilder<> Builder (I);
3332+ // We currently assume conditional faulting load/store is supported for
3333+ // scalar types only when creating new instructions. This can be easily
3334+ // extended for vector types in the future.
3335+ assert (!getLoadStoreType (I)->isVectorTy () && " not implemented" );
3336+ auto *Op0 = I->getOperand (0 );
3337+ Instruction *MaskedLoadStore = nullptr ;
3338+ if (auto *LI = dyn_cast<LoadInst>(I)) {
3339+ // Handle Load.
3340+ auto *Ty = I->getType ();
3341+ MaskedLoadStore = Builder.CreateMaskedLoad (FixedVectorType::get (Ty, 1 ),
3342+ Op0, LI->getAlign (), Mask);
3343+ I->replaceAllUsesWith (Builder.CreateBitCast (MaskedLoadStore, Ty));
3344+ } else {
3345+ // Handle Store.
3346+ auto *StoredVal =
3347+ Builder.CreateBitCast (Op0, FixedVectorType::get (Op0->getType (), 1 ));
3348+ MaskedLoadStore = Builder.CreateMaskedStore (
3349+ StoredVal, I->getOperand (1 ), cast<StoreInst>(I)->getAlign (), Mask);
3350+ }
3351+ // For non-debug metadata, only !annotation, !range, !nonnull and !align are
3352+ // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
3353+ //
3354+ // !nonnull, !align : Not support pointer type, no need to keep.
3355+ // !range: Load type is changed from scalar to vector, but the metadata on
3356+ // vector specifies a per-element range, so the semantics stay the
3357+ // same. Keep it.
3358+ // !annotation: Not impact semantics. Keep it.
3359+ I->dropUBImplyingAttrsAndUnknownMetadata (
3360+ {LLVMContext::MD_range, LLVMContext::MD_annotation});
3361+ // FIXME: DIAssignID is not supported for masked store yet.
3362+ // (Verifier::visitDIAssignIDMetadata)
3363+ at::deleteAssignmentMarkers (I);
3364+ I->eraseMetadataIf ([](unsigned MDKind, MDNode *Node) {
3365+ return Node->getMetadataID () == Metadata::DIAssignIDKind;
3366+ });
3367+ MaskedLoadStore->copyMetadata (*I);
3368+ I->eraseFromParent ();
3369+ }
3370+
32253371 // Insert selects and rewrite the PHI operands.
32263372 IRBuilder<NoFolder> Builder (BI);
32273373 for (PHINode &PN : EndBB->phis ()) {
0 commit comments