Skip to content

Conversation

@linuxrocks123
Copy link
Contributor

Still needs tests and testing. Also needs camel case. If the snake case bothers anyone, I'll fix it now.

@github-actions
Copy link

github-actions bot commented Sep 18, 2025

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff origin/main HEAD --extensions h,cpp -- llvm/lib/Target/AMDGPU/SICustomBranchBundles.h llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp llvm/lib/Target/AMDGPU/SILowerControlFlow.h --diff_from_common_commit

⚠️
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing origin/main to the base branch/commit you want to compare against.
⚠️

View the diff from clang-format here.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 5287ea30a..cc167c3f4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1574,7 +1574,7 @@ void GCNPassConfig::addFastRegAlloc() { // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); + // insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID); @@ -1602,12 +1602,12 @@ void GCNPassConfig::addOptimizedRegAlloc() { // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); + // insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); if (EnableRewritePartialRegUses) insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID); -  - insertPass(&RenameIndependentSubregsID,&SIRestoreNormalEpilogLegacyID); + + insertPass(&RenameIndependentSubregsID, &SIRestoreNormalEpilogLegacyID); if (isPassEnabled(EnablePreRAOptimizations)) insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID); @@ -2264,7 +2264,7 @@ void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc( // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - //insertPass<PHIEliminationPass>(SILowerControlFlowPass()); + // insertPass<PHIEliminationPass>(SILowerControlFlowPass()); if (EnableRewritePartialRegUses) insertPass<RenameIndependentSubregsPass>(GCNRewritePartialRegUsesPass()); diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 64d46c5e4..c067ceb94 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -48,12 +48,12 @@ /// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits //===----------------------------------------------------------------------===// -#include "SICustomBranchBundles.h" #include "SILowerControlFlow.h" #include "AMDGPU.h" #include "AMDGPULaneMaskUtils.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SICustomBranchBundles.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" @@ -160,7 +160,7 @@ public: MachineFunctionProperties getClearedProperties() const override { return MachineFunctionProperties().setNoPHIs(); } -  + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addUsedIfAvailable<LiveIntervalsWrapperPass>(); // Should preserve the same set that TwoAddressInstructions does. diff --git a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp index d6d02c940..09567d424 100644 --- a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp +++ b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp @@ -1,7 +1,7 @@ -#include "SICustomBranchBundles.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SICustomBranchBundles.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" @@ -12,8 +12,7 @@ #define DEBUG_TYPE "si-restore-normal-epilog" -namespace -{ +namespace { class SIRestoreNormalEpilogLegacy : public MachineFunctionPass { public: @@ -34,7 +33,6 @@ public: MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().setNoPHIs(); } - }; } // namespace 

bool phi_seen = false;
MachineBasicBlock::iterator first_phi;
for (first_phi = MBB.begin(); first_phi != MBB.end(); first_phi++)
Copy link
Contributor

@alex-t alex-t Sep 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bool phi_seen = !MBB.phis().empty() ?
or even:
if (!MBB.phis().empty()) first_phi = MBB.phis().begin();

break;
}

if (!phi_seen) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not necessary as SILowerControlFlow always insert at block begin.

MachineBasicBlock::iterator Start = MBB.begin();
Register SaveReg = MRI->createVirtualRegister(BoolRC);
MachineInstr *OrSaveExec =
BuildMI(MBB, Start, DL, TII->get(LMC.OrSaveExecOpc), SaveReg)
.add(MI.getOperand(1)); // Saved EXEC

move_ins_before_phis(*OrSaveExec);

if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &succ_MBB)
return ++Epilog_Iterator(branch_MI.getIterator());

llvm_unreachable("There should always be a branch to succ_MBB.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is about fall through?
bb.1: successors: %bb.2(0x80000000)

%64:vgpr_32 = V_MOV_B32_e32 100, implicit $exec 

bb.2: successors: %bb.5(0x40000000), %bb.3(0x40000000)

Copy link
Contributor Author

@linuxrocks123 linuxrocks123 Sep 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looked like the IR wasn't allowed to have fall-throughs at this stage, because I didn't see any and I did see unconditional-branch-to-next. Are fall-throughs instead allowed and just not common at this stage?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fall-throughs are not prohibited and I have seen enough valid MIR in SSA with fall-through CF.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, okay, I can normalize it by adding unconditional branches to next.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

@@ -323,6 +332,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
if (LV)
LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec);

move_ins_before_phis(*OrSaveExec);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EXEC restoring instructions happen not only in ELSE

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but the other ones are at the end of the blocks, not before the PHIs, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we lower SI_END_CF and don't split block we are going to have Exec = Exec OR Masked in the beginning or middle.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I'll take a look.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alex-t, I am not seeing the problem. The surrounding code makes clear that the insertion can only ever happen at block begin. I did not change those lines. Are you sure there is an issue here?

 MachineBasicBlock::iterator Start = MBB.begin(); // This must be inserted before phis and any spill code inserted before the // else. Register SaveReg = MRI->createVirtualRegister(BoolRC); MachineInstr *OrSaveExec = BuildMI(MBB, Start, DL, TII->get(LMC.OrSaveExecOpc), SaveReg) .add(MI.getOperand(1)); // Saved EXEC if (LV) LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec); moveInsBeforePhis(*OrSaveExec); 
MachineInstr *cloned_MI = MF.CloneMachineInstr(&MI);
cloned_MI->getOperand(0).setReg(cloned_reg);
phi.addReg(cloned_reg).addMBB(pred_MBB);
pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to make sure that there is no Machine Verifier between SILowerControlFlow and SIRestoreNormalEpilog
Also, check if unusual placement instructions after the branch does not break SILowerControlFlow logic itself

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There must not be a machine verifier or the testcase would have broken, I think.

@llvmbot
Copy link
Member

llvmbot commented Nov 11, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Patrick Simmons (linuxrocks123)

Changes

Still needs tests and testing. Also needs camel case. If the snake case bothers anyone, I'll fix it now.


Full diff: https://github.com/llvm/llvm-project/pull/159557.diff

7 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+3)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+8-3)
  • (modified) llvm/lib/Target/AMDGPU/CMakeLists.txt (+1)
  • (added) llvm/lib/Target/AMDGPU/SICustomBranchBundles.h (+261)
  • (modified) llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp (+18-1)
  • (modified) llvm/lib/Target/AMDGPU/SILowerControlFlow.h (+8)
  • (added) llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp (+46)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 67042b700c047..bff430f0f1967 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -248,6 +248,9 @@ extern char &AMDGPUPreloadKernArgPrologLegacyID; void initializeAMDGPUPreloadKernelArgumentsLegacyPass(PassRegistry &); extern char &AMDGPUPreloadKernelArgumentsLegacyID; +void initializeSIRestoreNormalEpilogLegacyPass(PassRegistry &); +extern char &SIRestoreNormalEpilogLegacyID; + // Passes common to R600 and SI FunctionPass *createAMDGPUPromoteAlloca(); void initializeAMDGPUPromoteAllocaPass(PassRegistry&); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b87b54ffc4f12..5287ea30a1c1b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -603,6 +603,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIOptimizeExecMaskingLegacyPass(*PR); initializeSIPreAllocateWWMRegsLegacyPass(*PR); initializeSIFormMemoryClausesLegacyPass(*PR); + initializeSIRestoreNormalEpilogLegacyPass(*PR); initializeSIPostRABundlerLegacyPass(*PR); initializeGCNCreateVOPDLegacyPass(*PR); initializeAMDGPUUnifyDivergentExitNodesPass(*PR); @@ -1573,7 +1574,7 @@ void GCNPassConfig::addFastRegAlloc() { // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); + //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID); @@ -1596,13 +1597,17 @@ void GCNPassConfig::addOptimizedRegAlloc() { if (OptVGPRLiveRange) insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeLegacyID); + insertPass(&SIOptimizeVGPRLiveRangeLegacyID, &SILowerControlFlowLegacyID); + // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); + //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); if (EnableRewritePartialRegUses) insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID); +  + insertPass(&RenameIndependentSubregsID,&SIRestoreNormalEpilogLegacyID); if (isPassEnabled(EnablePreRAOptimizations)) insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID); @@ -2259,7 +2264,7 @@ void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc( // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - insertPass<PHIEliminationPass>(SILowerControlFlowPass()); + //insertPass<PHIEliminationPass>(SILowerControlFlowPass()); if (EnableRewritePartialRegUses) insertPass<RenameIndependentSubregsPass>(GCNRewritePartialRegUsesPass()); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index a1e0e5293c706..d035f7aea3298 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -185,6 +185,7 @@ add_llvm_target(AMDGPUCodeGen SIPreEmitPeephole.cpp SIProgramInfo.cpp SIRegisterInfo.cpp + SIRestoreNormalEpilog.cpp SIShrinkInstructions.cpp SIWholeQuadMode.cpp diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h new file mode 100644 index 0000000000000..03ee6a251f391 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h @@ -0,0 +1,261 @@ +#pragma once + +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/ErrorHandling.h" + +#include "SIInstrInfo.h" + +#include <cassert> +#include <unordered_set> + +using namespace llvm; + +using std::unordered_set; +using std::vector; + +static inline MachineInstr &getBranchWithDest(MachineBasicBlock &BranchingMBB, + MachineBasicBlock &DestMBB) { + auto &TII = + *BranchingMBB.getParent()->getSubtarget<GCNSubtarget>().getInstrInfo(); + for (MachineInstr &BranchMI : reverse(BranchingMBB.instrs())) + if (BranchMI.isBranch() && TII.getBranchDestBlock(BranchMI) == &DestMBB) + return BranchMI; + + llvm_unreachable("Don't call this if there's no branch to the destination."); +} + +static inline void moveInsBeforePhis(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + + bool PhiSeen = false; + MachineBasicBlock::iterator FirstPhi; + for (FirstPhi = MBB.begin(); FirstPhi != MBB.end(); FirstPhi++) + if (FirstPhi->getOpcode() == AMDGPU::PHI) { + PhiSeen = true; + break; + } + + if (!PhiSeen) { + MI.removeFromParent(); + MBB.insert(MBB.begin(), &MI); + } else { + auto Phi = BuildMI(MBB, FirstPhi, MI.getDebugLoc(), TII.get(AMDGPU::PHI), + MI.getOperand(0).getReg()); + for (auto *PredMBB : MBB.predecessors()) { + Register ClonedReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); + MachineInstr &BranchMI = getBranchWithDest(*PredMBB, MBB); + MachineInstr *ClonedMI = MF.CloneMachineInstr(&MI); + ClonedMI->getOperand(0).setReg(ClonedReg); + Phi.addReg(ClonedReg).addMBB(PredMBB); + PredMBB->insertAfterBundle(BranchMI.getIterator(), ClonedMI); + ClonedMI->bundleWithPred(); + } + MI.eraseFromParent(); + } +} + +struct EpilogIterator { + MachineBasicBlock::instr_iterator InternalIt; + EpilogIterator(MachineBasicBlock::instr_iterator I) : InternalIt(I) {} + + bool operator==(const EpilogIterator &Other) { + return InternalIt == Other.InternalIt; + } + bool isEnd() { return InternalIt.isEnd(); } + MachineInstr &operator*() { return *InternalIt; } + MachineBasicBlock::instr_iterator operator->() { return InternalIt; } + EpilogIterator &operator++() { + ++InternalIt; + if (!InternalIt.isEnd() && InternalIt->isBranch()) + InternalIt = InternalIt->getParent()->instr_end(); + return *this; + } + EpilogIterator operator++(int Ignored) { + EpilogIterator ToReturn = *this; + ++*this; + return ToReturn; + } +}; + +static inline EpilogIterator getEpilogForSuccessor(MachineBasicBlock &PredMBB, + MachineBasicBlock &SuccMBB) { + MachineFunction &MF = *PredMBB.getParent(); + auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo(); + + for (MachineInstr &BranchMI : reverse(PredMBB.instrs())) + if (BranchMI.isBranch() && TII.getBranchDestBlock(BranchMI) == &SuccMBB) + return ++EpilogIterator(BranchMI.getIterator()); + + llvm_unreachable("There should always be a branch to succ_MBB."); +} + +static inline bool epilogsAreIdentical(const vector<MachineInstr *> Left, + const vector<MachineInstr *> Right, + const MachineBasicBlock &SuccMBB) { + if (Left.size() != Right.size()) + return false; + + for (unsigned I = 0; I < Left.size(); I++) + if (!Left[I]->isIdenticalTo(*Right[I])) + return false; + return true; +} + +static inline void moveBody(vector<MachineInstr *> &Body, + MachineBasicBlock &DestMBB) { + for (auto RevIt = Body.rbegin(); RevIt != Body.rend(); RevIt++) { + MachineInstr &BodyIns = **RevIt; + BodyIns.removeFromBundle(); + DestMBB.insert(DestMBB.begin(), &BodyIns); + } +} + +static inline void normalizeIrPostPhiElimination(MachineFunction &MF) { + auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo(); + + struct CFGRewriteEntry { + unordered_set<MachineBasicBlock *> PredMBBs; + MachineBasicBlock *SuccMBB; + vector<MachineInstr *> Body; + }; + + vector<CFGRewriteEntry> CfgRewriteEntries; + for (MachineBasicBlock &MBB : MF) { + CFGRewriteEntry ToInsert = {{}, &MBB, {}}; + for (MachineBasicBlock *PredMBB : MBB.predecessors()) { + EpilogIterator EpIt = getEpilogForSuccessor(*PredMBB, MBB); + + vector<MachineInstr *> Epilog; + while (!EpIt.isEnd()) + Epilog.push_back(&*EpIt++); + + if (!epilogsAreIdentical(ToInsert.Body, Epilog, MBB)) { + if (ToInsert.PredMBBs.size() && ToInsert.Body.size()) { + // Potentially, we need to insert a new entry. But first see if we + // can find an existing entry with the same epilog. + bool ExistingEntryFound = false; + for (auto RevIt = CfgRewriteEntries.rbegin(); + RevIt != CfgRewriteEntries.rend() && RevIt->SuccMBB == &MBB; + RevIt++) + if (epilogsAreIdentical(RevIt->Body, Epilog, MBB)) { + RevIt->PredMBBs.insert(PredMBB); + ExistingEntryFound = true; + break; + } + + if (!ExistingEntryFound) + CfgRewriteEntries.push_back(ToInsert); + } + ToInsert.PredMBBs.clear(); + ToInsert.Body = Epilog; + } + + ToInsert.PredMBBs.insert(PredMBB); + } + + // Handle the last potential rewrite entry. Lower instead of journaling a + // rewrite entry if all predecessor MBBs are in this single entry. + if (ToInsert.PredMBBs.size() == MBB.pred_size()) { + moveBody(ToInsert.Body, MBB); + for (MachineBasicBlock *PredMBB : ToInsert.PredMBBs) { + // Delete instructions that were lowered from epilog + MachineInstr &BranchIns = + getBranchWithDest(*PredMBB, *ToInsert.SuccMBB); + auto EpilogIt = ++EpilogIterator(BranchIns.getIterator()); + while (!EpilogIt.isEnd()) + EpilogIt++->eraseFromBundle(); + } + + } else if (ToInsert.Body.size()) + CfgRewriteEntries.push_back(ToInsert); + } + + // Perform the journaled rewrites. + for (auto &Entry : CfgRewriteEntries) { + MachineBasicBlock *MezzanineMBB = MF.CreateMachineBasicBlock(); + MF.insert(MF.end(), MezzanineMBB); + + // Deal with mezzanine to successor succession. + BuildMI(MezzanineMBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)) + .addMBB(Entry.SuccMBB); + MezzanineMBB->addSuccessor(Entry.SuccMBB); + + // Move instructions to mezzanine block. + moveBody(Entry.Body, *MezzanineMBB); + + for (MachineBasicBlock *PredMBB : Entry.PredMBBs) { + // Deal with predecessor to mezzanine succession. + MachineInstr &BranchIns = getBranchWithDest(*PredMBB, *Entry.SuccMBB); + assert(BranchIns.getOperand(0).isMBB() && "Branch instruction isn't."); + BranchIns.getOperand(0).setMBB(MezzanineMBB); + PredMBB->replaceSuccessor(Entry.SuccMBB, MezzanineMBB); + + // Delete instructions that were lowered from epilog + auto EpilogIt = ++EpilogIterator(BranchIns.getIterator()); + while (!EpilogIt.isEnd()) + EpilogIt++->eraseFromBundle(); + } + } +} + +namespace std { +template <> struct hash<Register> { + std::size_t operator()(const Register &R) const { + return hash<unsigned>()(R); + } +}; +} // namespace std + +static inline void hoistUnrelatedCopies(MachineFunction &MF) { + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &BranchMI : MBB) { + if (!BranchMI.isBranch()) + continue; + + unordered_set<Register> RelatedCopySources; + EpilogIterator EpilogIt = BranchMI.getIterator(); + EpilogIterator CopyMoveIt = ++EpilogIt; + while (!EpilogIt.isEnd()) { + if (EpilogIt->getOpcode() != AMDGPU::COPY) + RelatedCopySources.insert(EpilogIt->getOperand(0).getReg()); + ++EpilogIt; + } + + while (!CopyMoveIt.isEnd()) { + EpilogIterator Next = CopyMoveIt; + ++Next; + if ((CopyMoveIt->getOpcode() == AMDGPU::COPY && + !RelatedCopySources.count(CopyMoveIt->getOperand(1).getReg())) || + CopyMoveIt->getOpcode() == AMDGPU::IMPLICIT_DEF) { + MachineInstr &MIToMove = *CopyMoveIt; + MIToMove.removeFromBundle(); + MBB.insert(BranchMI.getIterator(), &MIToMove); + } + + CopyMoveIt = Next; + } + } +} + +static inline bool makeEverySuccessorBeBranchTarget(MachineFunction &MF) { + bool Changed = false; + auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo(); + for (MachineBasicBlock &MBB : MF) + if (MBB.empty() || + (!MBB.back().isUnconditionalBranch() && !MBB.back().isReturn())) { + MachineBasicBlock *LayoutSuccessor = + &*std::next(MachineFunction::iterator(MBB)); + BuildMI(&MBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)) + .addMBB(LayoutSuccessor); + Changed = true; + } + + return Changed; +} diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 8586d6c18b361..64d46c5e401de 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -48,6 +48,7 @@ /// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits //===----------------------------------------------------------------------===// +#include "SICustomBranchBundles.h" #include "SILowerControlFlow.h" #include "AMDGPU.h" #include "AMDGPULaneMaskUtils.h" @@ -152,6 +153,14 @@ class SILowerControlFlowLegacy : public MachineFunctionPass { return "SI Lower control flow pseudo instructions"; } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().setIsSSA(); + } + + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().setNoPHIs(); + } +  void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addUsedIfAvailable<LiveIntervalsWrapperPass>(); // Should preserve the same set that TwoAddressInstructions does. @@ -322,6 +331,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { if (LV) LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec); + moveInsBeforePhis(*OrSaveExec); + MachineBasicBlock *DestBB = MI.getOperand(2).getMBB(); MachineBasicBlock::iterator ElsePt(MI); @@ -789,7 +800,7 @@ bool SILowerControlFlow::run(MachineFunction &MF) { } } - bool Changed = false; + bool Changed = makeEverySuccessorBeBranchTarget(MF); MachineFunction::iterator NextBB; for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); BI = NextBB) { @@ -839,6 +850,12 @@ bool SILowerControlFlow::run(MachineFunction &MF) { LoweredIf.clear(); KillBlocks.clear(); + if (Changed) + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : MBB) + if (MI.isBundled()) + MI.unbundleFromSucc(); + return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h index 23803c679c246..0f4df79952999 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h @@ -16,6 +16,14 @@ class SILowerControlFlowPass : public PassInfoMixin<SILowerControlFlowPass> { public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getRequiredProperties() const { + return MachineFunctionProperties().setIsSSA(); + } + + MachineFunctionProperties getClearedProperties() const { + return MachineFunctionProperties().setNoPHIs(); + } }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp new file mode 100644 index 0000000000000..d6d02c940731c --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp @@ -0,0 +1,46 @@ +#include "SICustomBranchBundles.h" +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "si-restore-normal-epilog" + +namespace +{ + +class SIRestoreNormalEpilogLegacy : public MachineFunctionPass { +public: + static char ID; + + SIRestoreNormalEpilogLegacy() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + hoistUnrelatedCopies(MF); + normalizeIrPostPhiElimination(MF); + return true; + } + + StringRef getPassName() const override { + return "SI Restore Normal Epilog Post PHI Elimination"; + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().setNoPHIs(); + } + +}; + +} // namespace + +INITIALIZE_PASS(SIRestoreNormalEpilogLegacy, DEBUG_TYPE, + "SI restore normal epilog", false, false) + +char SIRestoreNormalEpilogLegacy::ID; +char &llvm::SIRestoreNormalEpilogLegacyID = SIRestoreNormalEpilogLegacy::ID; 
@linuxrocks123
Copy link
Contributor Author

@alex-t I've completed all action items for this, but there are some comments to which I've responded where you have not replied. Please let me know your thoughts.

@jayfoad
Copy link
Contributor

jayfoad commented Nov 12, 2025

This needs a proper description and motivation - what does the patch do and why?

@kzhuravl kzhuravl requested a review from alex-t November 21, 2025 16:09
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

4 participants