- Notifications
You must be signed in to change notification settings - Fork 15.3k
Move SI Lower Control Flow Up #159557
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Move SI Lower Control Flow Up #159557
Conversation
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions h,cpp -- llvm/lib/Target/AMDGPU/SICustomBranchBundles.h llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp llvm/lib/Target/AMDGPU/SILowerControlFlow.h --diff_from_common_commit
View the diff from clang-format here.diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 5287ea30a..cc167c3f4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1574,7 +1574,7 @@ void GCNPassConfig::addFastRegAlloc() { // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); + // insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID); @@ -1602,12 +1602,12 @@ void GCNPassConfig::addOptimizedRegAlloc() { // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); + // insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); if (EnableRewritePartialRegUses) insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID); - - insertPass(&RenameIndependentSubregsID,&SIRestoreNormalEpilogLegacyID); + + insertPass(&RenameIndependentSubregsID, &SIRestoreNormalEpilogLegacyID); if (isPassEnabled(EnablePreRAOptimizations)) insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID); @@ -2264,7 +2264,7 @@ void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc( // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - //insertPass<PHIEliminationPass>(SILowerControlFlowPass()); + // insertPass<PHIEliminationPass>(SILowerControlFlowPass()); if (EnableRewritePartialRegUses) insertPass<RenameIndependentSubregsPass>(GCNRewritePartialRegUsesPass()); diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 64d46c5e4..c067ceb94 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -48,12 +48,12 @@ /// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits //===----------------------------------------------------------------------===// -#include "SICustomBranchBundles.h" #include "SILowerControlFlow.h" #include "AMDGPU.h" #include "AMDGPULaneMaskUtils.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SICustomBranchBundles.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" @@ -160,7 +160,7 @@ public: MachineFunctionProperties getClearedProperties() const override { return MachineFunctionProperties().setNoPHIs(); } - + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addUsedIfAvailable<LiveIntervalsWrapperPass>(); // Should preserve the same set that TwoAddressInstructions does. diff --git a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp index d6d02c940..09567d424 100644 --- a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp +++ b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp @@ -1,7 +1,7 @@ -#include "SICustomBranchBundles.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SICustomBranchBundles.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" @@ -12,8 +12,7 @@ #define DEBUG_TYPE "si-restore-normal-epilog" -namespace -{ +namespace { class SIRestoreNormalEpilogLegacy : public MachineFunctionPass { public: @@ -34,7 +33,6 @@ public: MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().setNoPHIs(); } - }; } // namespace |
| | ||
| bool phi_seen = false; | ||
| MachineBasicBlock::iterator first_phi; | ||
| for (first_phi = MBB.begin(); first_phi != MBB.end(); first_phi++) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bool phi_seen = !MBB.phis().empty() ?
or even:
if (!MBB.phis().empty()) first_phi = MBB.phis().begin();
| break; | ||
| } | ||
| | ||
| if (!phi_seen) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not necessary as SILowerControlFlow always insert at block begin.
MachineBasicBlock::iterator Start = MBB.begin();
Register SaveReg = MRI->createVirtualRegister(BoolRC);
MachineInstr *OrSaveExec =
BuildMI(MBB, Start, DL, TII->get(LMC.OrSaveExecOpc), SaveReg)
.add(MI.getOperand(1)); // Saved EXEC
move_ins_before_phis(*OrSaveExec);
| if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &succ_MBB) | ||
| return ++Epilog_Iterator(branch_MI.getIterator()); | ||
| | ||
| llvm_unreachable("There should always be a branch to succ_MBB."); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is about fall through?
bb.1: successors: %bb.2(0x80000000)
%64:vgpr_32 = V_MOV_B32_e32 100, implicit $exec bb.2: successors: %bb.5(0x40000000), %bb.3(0x40000000)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looked like the IR wasn't allowed to have fall-throughs at this stage, because I didn't see any and I did see unconditional-branch-to-next. Are fall-throughs instead allowed and just not common at this stage?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The fall-throughs are not prohibited and I have seen enough valid MIR in SSA with fall-through CF.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, okay, I can normalize it by adding unconditional branches to next.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
| @@ -323,6 +332,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { | |||
| if (LV) | |||
| LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec); | |||
| | |||
| move_ins_before_phis(*OrSaveExec); | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
EXEC restoring instructions happen not only in ELSE
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, but the other ones are at the end of the blocks, not before the PHIs, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we lower SI_END_CF and don't split block we are going to have Exec = Exec OR Masked in the beginning or middle.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, I'll take a look.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@alex-t, I am not seeing the problem. The surrounding code makes clear that the insertion can only ever happen at block begin. I did not change those lines. Are you sure there is an issue here?
MachineBasicBlock::iterator Start = MBB.begin(); // This must be inserted before phis and any spill code inserted before the // else. Register SaveReg = MRI->createVirtualRegister(BoolRC); MachineInstr *OrSaveExec = BuildMI(MBB, Start, DL, TII->get(LMC.OrSaveExecOpc), SaveReg) .add(MI.getOperand(1)); // Saved EXEC if (LV) LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec); moveInsBeforePhis(*OrSaveExec); | MachineInstr *cloned_MI = MF.CloneMachineInstr(&MI); | ||
| cloned_MI->getOperand(0).setReg(cloned_reg); | ||
| phi.addReg(cloned_reg).addMBB(pred_MBB); | ||
| pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You need to make sure that there is no Machine Verifier between SILowerControlFlow and SIRestoreNormalEpilog
Also, check if unusual placement instructions after the branch does not break SILowerControlFlow logic itself
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There must not be a machine verifier or the testcase would have broken, I think.
4d4f71a to 07f9460 Compare 07f9460 to 734b863 Compare 734b863 to 28c01b0 Compare | @llvm/pr-subscribers-backend-amdgpu Author: Patrick Simmons (linuxrocks123) ChangesStill needs tests and testing. Also needs camel case. If the snake case bothers anyone, I'll fix it now. Full diff: https://github.com/llvm/llvm-project/pull/159557.diff 7 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 67042b700c047..bff430f0f1967 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -248,6 +248,9 @@ extern char &AMDGPUPreloadKernArgPrologLegacyID; void initializeAMDGPUPreloadKernelArgumentsLegacyPass(PassRegistry &); extern char &AMDGPUPreloadKernelArgumentsLegacyID; +void initializeSIRestoreNormalEpilogLegacyPass(PassRegistry &); +extern char &SIRestoreNormalEpilogLegacyID; + // Passes common to R600 and SI FunctionPass *createAMDGPUPromoteAlloca(); void initializeAMDGPUPromoteAllocaPass(PassRegistry&); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b87b54ffc4f12..5287ea30a1c1b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -603,6 +603,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIOptimizeExecMaskingLegacyPass(*PR); initializeSIPreAllocateWWMRegsLegacyPass(*PR); initializeSIFormMemoryClausesLegacyPass(*PR); + initializeSIRestoreNormalEpilogLegacyPass(*PR); initializeSIPostRABundlerLegacyPass(*PR); initializeGCNCreateVOPDLegacyPass(*PR); initializeAMDGPUUnifyDivergentExitNodesPass(*PR); @@ -1573,7 +1574,7 @@ void GCNPassConfig::addFastRegAlloc() { // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); + //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID); @@ -1596,13 +1597,17 @@ void GCNPassConfig::addOptimizedRegAlloc() { if (OptVGPRLiveRange) insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeLegacyID); + insertPass(&SIOptimizeVGPRLiveRangeLegacyID, &SILowerControlFlowLegacyID); + // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); + //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); if (EnableRewritePartialRegUses) insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID); + + insertPass(&RenameIndependentSubregsID,&SIRestoreNormalEpilogLegacyID); if (isPassEnabled(EnablePreRAOptimizations)) insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID); @@ -2259,7 +2264,7 @@ void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc( // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - insertPass<PHIEliminationPass>(SILowerControlFlowPass()); + //insertPass<PHIEliminationPass>(SILowerControlFlowPass()); if (EnableRewritePartialRegUses) insertPass<RenameIndependentSubregsPass>(GCNRewritePartialRegUsesPass()); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index a1e0e5293c706..d035f7aea3298 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -185,6 +185,7 @@ add_llvm_target(AMDGPUCodeGen SIPreEmitPeephole.cpp SIProgramInfo.cpp SIRegisterInfo.cpp + SIRestoreNormalEpilog.cpp SIShrinkInstructions.cpp SIWholeQuadMode.cpp diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h new file mode 100644 index 0000000000000..03ee6a251f391 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h @@ -0,0 +1,261 @@ +#pragma once + +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/ErrorHandling.h" + +#include "SIInstrInfo.h" + +#include <cassert> +#include <unordered_set> + +using namespace llvm; + +using std::unordered_set; +using std::vector; + +static inline MachineInstr &getBranchWithDest(MachineBasicBlock &BranchingMBB, + MachineBasicBlock &DestMBB) { + auto &TII = + *BranchingMBB.getParent()->getSubtarget<GCNSubtarget>().getInstrInfo(); + for (MachineInstr &BranchMI : reverse(BranchingMBB.instrs())) + if (BranchMI.isBranch() && TII.getBranchDestBlock(BranchMI) == &DestMBB) + return BranchMI; + + llvm_unreachable("Don't call this if there's no branch to the destination."); +} + +static inline void moveInsBeforePhis(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + + bool PhiSeen = false; + MachineBasicBlock::iterator FirstPhi; + for (FirstPhi = MBB.begin(); FirstPhi != MBB.end(); FirstPhi++) + if (FirstPhi->getOpcode() == AMDGPU::PHI) { + PhiSeen = true; + break; + } + + if (!PhiSeen) { + MI.removeFromParent(); + MBB.insert(MBB.begin(), &MI); + } else { + auto Phi = BuildMI(MBB, FirstPhi, MI.getDebugLoc(), TII.get(AMDGPU::PHI), + MI.getOperand(0).getReg()); + for (auto *PredMBB : MBB.predecessors()) { + Register ClonedReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); + MachineInstr &BranchMI = getBranchWithDest(*PredMBB, MBB); + MachineInstr *ClonedMI = MF.CloneMachineInstr(&MI); + ClonedMI->getOperand(0).setReg(ClonedReg); + Phi.addReg(ClonedReg).addMBB(PredMBB); + PredMBB->insertAfterBundle(BranchMI.getIterator(), ClonedMI); + ClonedMI->bundleWithPred(); + } + MI.eraseFromParent(); + } +} + +struct EpilogIterator { + MachineBasicBlock::instr_iterator InternalIt; + EpilogIterator(MachineBasicBlock::instr_iterator I) : InternalIt(I) {} + + bool operator==(const EpilogIterator &Other) { + return InternalIt == Other.InternalIt; + } + bool isEnd() { return InternalIt.isEnd(); } + MachineInstr &operator*() { return *InternalIt; } + MachineBasicBlock::instr_iterator operator->() { return InternalIt; } + EpilogIterator &operator++() { + ++InternalIt; + if (!InternalIt.isEnd() && InternalIt->isBranch()) + InternalIt = InternalIt->getParent()->instr_end(); + return *this; + } + EpilogIterator operator++(int Ignored) { + EpilogIterator ToReturn = *this; + ++*this; + return ToReturn; + } +}; + +static inline EpilogIterator getEpilogForSuccessor(MachineBasicBlock &PredMBB, + MachineBasicBlock &SuccMBB) { + MachineFunction &MF = *PredMBB.getParent(); + auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo(); + + for (MachineInstr &BranchMI : reverse(PredMBB.instrs())) + if (BranchMI.isBranch() && TII.getBranchDestBlock(BranchMI) == &SuccMBB) + return ++EpilogIterator(BranchMI.getIterator()); + + llvm_unreachable("There should always be a branch to succ_MBB."); +} + +static inline bool epilogsAreIdentical(const vector<MachineInstr *> Left, + const vector<MachineInstr *> Right, + const MachineBasicBlock &SuccMBB) { + if (Left.size() != Right.size()) + return false; + + for (unsigned I = 0; I < Left.size(); I++) + if (!Left[I]->isIdenticalTo(*Right[I])) + return false; + return true; +} + +static inline void moveBody(vector<MachineInstr *> &Body, + MachineBasicBlock &DestMBB) { + for (auto RevIt = Body.rbegin(); RevIt != Body.rend(); RevIt++) { + MachineInstr &BodyIns = **RevIt; + BodyIns.removeFromBundle(); + DestMBB.insert(DestMBB.begin(), &BodyIns); + } +} + +static inline void normalizeIrPostPhiElimination(MachineFunction &MF) { + auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo(); + + struct CFGRewriteEntry { + unordered_set<MachineBasicBlock *> PredMBBs; + MachineBasicBlock *SuccMBB; + vector<MachineInstr *> Body; + }; + + vector<CFGRewriteEntry> CfgRewriteEntries; + for (MachineBasicBlock &MBB : MF) { + CFGRewriteEntry ToInsert = {{}, &MBB, {}}; + for (MachineBasicBlock *PredMBB : MBB.predecessors()) { + EpilogIterator EpIt = getEpilogForSuccessor(*PredMBB, MBB); + + vector<MachineInstr *> Epilog; + while (!EpIt.isEnd()) + Epilog.push_back(&*EpIt++); + + if (!epilogsAreIdentical(ToInsert.Body, Epilog, MBB)) { + if (ToInsert.PredMBBs.size() && ToInsert.Body.size()) { + // Potentially, we need to insert a new entry. But first see if we + // can find an existing entry with the same epilog. + bool ExistingEntryFound = false; + for (auto RevIt = CfgRewriteEntries.rbegin(); + RevIt != CfgRewriteEntries.rend() && RevIt->SuccMBB == &MBB; + RevIt++) + if (epilogsAreIdentical(RevIt->Body, Epilog, MBB)) { + RevIt->PredMBBs.insert(PredMBB); + ExistingEntryFound = true; + break; + } + + if (!ExistingEntryFound) + CfgRewriteEntries.push_back(ToInsert); + } + ToInsert.PredMBBs.clear(); + ToInsert.Body = Epilog; + } + + ToInsert.PredMBBs.insert(PredMBB); + } + + // Handle the last potential rewrite entry. Lower instead of journaling a + // rewrite entry if all predecessor MBBs are in this single entry. + if (ToInsert.PredMBBs.size() == MBB.pred_size()) { + moveBody(ToInsert.Body, MBB); + for (MachineBasicBlock *PredMBB : ToInsert.PredMBBs) { + // Delete instructions that were lowered from epilog + MachineInstr &BranchIns = + getBranchWithDest(*PredMBB, *ToInsert.SuccMBB); + auto EpilogIt = ++EpilogIterator(BranchIns.getIterator()); + while (!EpilogIt.isEnd()) + EpilogIt++->eraseFromBundle(); + } + + } else if (ToInsert.Body.size()) + CfgRewriteEntries.push_back(ToInsert); + } + + // Perform the journaled rewrites. + for (auto &Entry : CfgRewriteEntries) { + MachineBasicBlock *MezzanineMBB = MF.CreateMachineBasicBlock(); + MF.insert(MF.end(), MezzanineMBB); + + // Deal with mezzanine to successor succession. + BuildMI(MezzanineMBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)) + .addMBB(Entry.SuccMBB); + MezzanineMBB->addSuccessor(Entry.SuccMBB); + + // Move instructions to mezzanine block. + moveBody(Entry.Body, *MezzanineMBB); + + for (MachineBasicBlock *PredMBB : Entry.PredMBBs) { + // Deal with predecessor to mezzanine succession. + MachineInstr &BranchIns = getBranchWithDest(*PredMBB, *Entry.SuccMBB); + assert(BranchIns.getOperand(0).isMBB() && "Branch instruction isn't."); + BranchIns.getOperand(0).setMBB(MezzanineMBB); + PredMBB->replaceSuccessor(Entry.SuccMBB, MezzanineMBB); + + // Delete instructions that were lowered from epilog + auto EpilogIt = ++EpilogIterator(BranchIns.getIterator()); + while (!EpilogIt.isEnd()) + EpilogIt++->eraseFromBundle(); + } + } +} + +namespace std { +template <> struct hash<Register> { + std::size_t operator()(const Register &R) const { + return hash<unsigned>()(R); + } +}; +} // namespace std + +static inline void hoistUnrelatedCopies(MachineFunction &MF) { + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &BranchMI : MBB) { + if (!BranchMI.isBranch()) + continue; + + unordered_set<Register> RelatedCopySources; + EpilogIterator EpilogIt = BranchMI.getIterator(); + EpilogIterator CopyMoveIt = ++EpilogIt; + while (!EpilogIt.isEnd()) { + if (EpilogIt->getOpcode() != AMDGPU::COPY) + RelatedCopySources.insert(EpilogIt->getOperand(0).getReg()); + ++EpilogIt; + } + + while (!CopyMoveIt.isEnd()) { + EpilogIterator Next = CopyMoveIt; + ++Next; + if ((CopyMoveIt->getOpcode() == AMDGPU::COPY && + !RelatedCopySources.count(CopyMoveIt->getOperand(1).getReg())) || + CopyMoveIt->getOpcode() == AMDGPU::IMPLICIT_DEF) { + MachineInstr &MIToMove = *CopyMoveIt; + MIToMove.removeFromBundle(); + MBB.insert(BranchMI.getIterator(), &MIToMove); + } + + CopyMoveIt = Next; + } + } +} + +static inline bool makeEverySuccessorBeBranchTarget(MachineFunction &MF) { + bool Changed = false; + auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo(); + for (MachineBasicBlock &MBB : MF) + if (MBB.empty() || + (!MBB.back().isUnconditionalBranch() && !MBB.back().isReturn())) { + MachineBasicBlock *LayoutSuccessor = + &*std::next(MachineFunction::iterator(MBB)); + BuildMI(&MBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)) + .addMBB(LayoutSuccessor); + Changed = true; + } + + return Changed; +} diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 8586d6c18b361..64d46c5e401de 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -48,6 +48,7 @@ /// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits //===----------------------------------------------------------------------===// +#include "SICustomBranchBundles.h" #include "SILowerControlFlow.h" #include "AMDGPU.h" #include "AMDGPULaneMaskUtils.h" @@ -152,6 +153,14 @@ class SILowerControlFlowLegacy : public MachineFunctionPass { return "SI Lower control flow pseudo instructions"; } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().setIsSSA(); + } + + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().setNoPHIs(); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addUsedIfAvailable<LiveIntervalsWrapperPass>(); // Should preserve the same set that TwoAddressInstructions does. @@ -322,6 +331,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { if (LV) LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec); + moveInsBeforePhis(*OrSaveExec); + MachineBasicBlock *DestBB = MI.getOperand(2).getMBB(); MachineBasicBlock::iterator ElsePt(MI); @@ -789,7 +800,7 @@ bool SILowerControlFlow::run(MachineFunction &MF) { } } - bool Changed = false; + bool Changed = makeEverySuccessorBeBranchTarget(MF); MachineFunction::iterator NextBB; for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); BI = NextBB) { @@ -839,6 +850,12 @@ bool SILowerControlFlow::run(MachineFunction &MF) { LoweredIf.clear(); KillBlocks.clear(); + if (Changed) + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : MBB) + if (MI.isBundled()) + MI.unbundleFromSucc(); + return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h index 23803c679c246..0f4df79952999 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h @@ -16,6 +16,14 @@ class SILowerControlFlowPass : public PassInfoMixin<SILowerControlFlowPass> { public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getRequiredProperties() const { + return MachineFunctionProperties().setIsSSA(); + } + + MachineFunctionProperties getClearedProperties() const { + return MachineFunctionProperties().setNoPHIs(); + } }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp new file mode 100644 index 0000000000000..d6d02c940731c --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp @@ -0,0 +1,46 @@ +#include "SICustomBranchBundles.h" +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "si-restore-normal-epilog" + +namespace +{ + +class SIRestoreNormalEpilogLegacy : public MachineFunctionPass { +public: + static char ID; + + SIRestoreNormalEpilogLegacy() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + hoistUnrelatedCopies(MF); + normalizeIrPostPhiElimination(MF); + return true; + } + + StringRef getPassName() const override { + return "SI Restore Normal Epilog Post PHI Elimination"; + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().setNoPHIs(); + } + +}; + +} // namespace + +INITIALIZE_PASS(SIRestoreNormalEpilogLegacy, DEBUG_TYPE, + "SI restore normal epilog", false, false) + +char SIRestoreNormalEpilogLegacy::ID; +char &llvm::SIRestoreNormalEpilogLegacyID = SIRestoreNormalEpilogLegacy::ID; |
| @alex-t I've completed all action items for this, but there are some comments to which I've responded where you have not replied. Please let me know your thoughts. |
…nt VREGs in the hoisted instructions and merging them with PHI.
28c01b0 to b5421a2 Compare | This needs a proper description and motivation - what does the patch do and why? |
Still needs tests and testing. Also needs camel case. If the snake case bothers anyone, I'll fix it now.