CFG 源码分析9.0


  SIMPLIFY CFG 源码分析 9.0 Some useful command to show Result Opt 工具显示优化先后的IR opt -O2 -print-before-all -print-after-all is_sorted2.ll Opt 运行单独的优化pass opt -simplifycfg a.ll -S -o aftercfg.ll Opt 显示控制流图 opt -dot-cfg a.ll dot mian.dot -Tpng -o a.png xdg-open a.png Debug 时候F->ViewCFG(); 显示函数的控制流图 BB 块的前驱 pred ,控制流图节点的入边 BB 块的后继 succ, 控制流节点的出边 Opt 打印统计信息 opt -stats -simplifycfg a.ll -S -o aftercfg.ll bb块的数据结构   1,函数入口 它依赖AssumptionAnalysis。 PreservedAnalyses SimplifyCFGPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TTI = AM.getResult(F); Options.AC = &AM.getResult(F); if (!simplifyFunctionCFG(F, TTI, Options)) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); return PA; } 2,simple CFG的函数主体 A, 检测可达性,去掉不可达的bb块。 B,多个return 语句跳转到一个,增加一个phi指令。 C,迭代优化CFG图。 static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, const SimplifyCFGOptions &Options) { bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); EverChanged |= iterativelySimplifyCFG(F, TTI, Options);   // If neither pass changed anything, we're done. if (!EverChanged) return false;   // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens, // removeUnreachableBlocks is needed to nuke them, which means we should // iterate between the two optimizations. We structure the code like this to // avoid rerunning iterativelySimplifyCFG if the second pass of // removeUnreachableBlocks doesn't do anything. if (!removeUnreachableBlocks(F)) return true;   do { EverChanged = iterativelySimplifyCFG(F, TTI, Options); EverChanged |= removeUnreachableBlocks(F); } while (EverChanged);   return true; }   去除不可达块: 1,从Function entry 开始,检测可达块。 2,遍历Funtion 所有块,如果不是可达快,则删除。 llvm/Local.cpp at release_90 · llvm-mirror/llvm · GitHub removeUnreachableBlocks bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU) { SmallPtrSet Reachable; bool Changed = markAliveBlocks(F, Reachable, DTU);   // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) return Changed;   assert(Reachable.size() < F.size()); NumRemoved += F.size()-Reachable.size();   SmallSetVector DeadBlockSet; for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) { auto *BB = &*I; if (Reachable.count(BB)) continue; DeadBlockSet.insert(BB); }       // Loop over all of the basic blocks that are not reachable, dropping all of // their internal references. Update DTU and LVI if available. std::vector Updates; for (auto *BB : DeadBlockSet) { for (BasicBlock *Successor : successors(BB)) { if (!DeadBlockSet.count(Successor)) Successor->removePredecessor(BB); if (DTU) Updates.push_back({DominatorTree::Delete, BB, Successor}); } if (LVI) LVI->eraseBlock(BB); BB->dropAllReferences(); } for (Function::iterator I = ++F.begin(); I != F.end();) { auto *BB = &*I; if (Reachable.count(BB)) { ++I; continue; } if (DTU) { // Remove the terminator of BB to clear the successor list of BB. if (BB->getTerminator()) BB->getInstList().pop_back(); new UnreachableInst(BB->getContext(), BB); assert(succ_empty(BB) && "The successor list of BB isn't empty before " "applying corresponding DTU updates."); ++I; } else { I = F.getBasicBlockList().erase(I); } }   return true; }   static bool markAliveBlocks(Function &F, SmallPtrSetImpl &Reachable, DomTreeUpdater *DTU = nullptr) { SmallVector Worklist; BasicBlock *BB = &F.front(); Worklist.push_back(BB); Reachable.insert(BB); bool Changed = false; do { BB = Worklist.pop_back_val(); // here I delete some code dealing other things //**// for (BasicBlock *Successor : successors(BB)) if (Reachable.insert(Successor).second) Worklist.push_back(Successor); } while (!Worklist.empty()); return Changed; } 实例分析 源码 int main(){ int i ,j; for (int i = 0 ; i < 10 ; i++) printf("%d", j); return 0; return 1; }   %0 %4 %7 合并多个ret分支 mergeEmptyReturnBlocks examples:   int main(){ int i , j ; j = 5; i = rand()%3; if (i == 0) return i; else if (i ==1) return j; else return 3; return 0; }     iterativelySimplifyCFG(F, TTI, Options); static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, const SimplifyCFGOptions &Options) { bool Changed = false; bool LocalChange = true;   SmallVector, 32> Edges; FindFunctionBackedges(F, Edges); SmallPtrSet LoopHeaders; for (unsigned i = 0, e = Edges.size(); i != e; ++i) LoopHeaders.insert(const_cast(Edges[i].second));   while (LocalChange) { LocalChange = false;   // Loop over all of the basic blocks and remove them if they are unneeded. for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { if (simplifyCFG(&*BBIt++, TTI, Options, &LoopHeaders)) { LocalChange = true; ++NumSimpl; } } Changed |= LocalChange; } return 对一个CFG 寻找回边。拓扑排序. Stack/set visited block Vector edge 7-->4 0 0-->4 0-->4--->7 0 -->4-->7-->4 find an loop 4 <--->7 0 -->4-->12 0 -->4 0 bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { bool Changed = false;   assert(BB && BB->getParent() && "Block not embedded in function!"); assert(BB->getTerminator() && "Degenerate basic block encountered!");   // Remove basic blocks that have no predecessors (except the entry block)... // or that just have themself as a predecessor. These are unreachable. if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || BB->getSinglePredecessor() == BB) { LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB); DeleteDeadBlock(BB); return true; }   // Check to see if we can constant propagate this terminator instruction // away... // 常量跳转,当一个bb块最后是一个条件跳转语句,并且是条件是一个常数的时候。 // 将条件跳转转化成无条件跳转。删除无法到达的跳转的bb块的前驱,并更新phi节点。 // 如果两个目标跳转是同一个dest块,也在此处理。 // bb1 : Terminator br 1 : dest : old // dest : ... pred:bb1 // old : pred:bb1, dest // t = phi [1,bb1],[2,dest] ... // ===> // bb1 : br dest // dest : ... pred:bb1 // old : pred: dest // t = phi [2,dest] // Changed |= ConstantFoldTerminator(BB, true);   // Check for and eliminate duplicate PHI nodes in this block. // 一个哈希去重,只是incomeming不同的phinode Changed |= EliminateDuplicatePHINodes(BB);   // Check for and remove branches that will always cause undefined behavior. Changed |= removeUndefIntroducingPredecessor(BB);   // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. // 如果只有一个确定的前驱节点,并且该前驱只有一个确定的后继,那么把自己融合到前驱。 // a-->b merge // a-->b--->c // d--/ can not merge // a--->b-->c // \->d can not merge if (MergeBlockIntoPredecessor(BB)) return true;   if (SinkCommon && Options.SinkCommonInsts) Changed |= SinkCommonCodeFromPredecessors(BB);   IRBuilder<> Builder(BB);   // If there is a trivial two-entry PHI node in this basic block, and we can // eliminate it, do so now. if (auto *PN = dyn_cast(BB->begin())) if (PN->getNumIncomingValues() == 2) // 将if-else 造成的phi 语句改变成select Changed |= FoldTwoEntryPHINode(PN, TTI, DL);   Builder.SetInsertPoint(BB->getTerminator()); if (auto *BI = dyn_cast(BB->getTerminator())) { if (BI->isUnconditional()) { // 单指令的cmp上移,phi增加incoming /// This is called when we find an icmp instruction /// (a seteq/setne with a constant) as the only instruction in a /// block that ends with an uncond branch. We are looking for a very specific /// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In /// this case, we merge the first two "or's of icmp" into a switch, but then the /// default value goes to an uncond block with a seteq in it, we get something /// like: /// /// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ] /// DEFAULT: /// %tmp = icmp eq i8 %A, 92 /// br label %end /// end: /// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] /// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. if (SimplifyUncondBranch(BI, Builder)) return true; } else { if (SimplifyCondBranch(BI, Builder)) return true; } } else if (auto *RI = dyn_cast(BB->getTerminator())) { if (SimplifyReturn(RI, Builder)) return true; } else if (auto *RI = dyn_cast(BB->getTerminator())) { if (SimplifyResume(RI, Builder)) return true; } else if (auto *RI = dyn_cast(BB->getTerminator())) { // 异常处理的代码,类回收 if (SimplifyCleanupReturn(RI)) return true; } else if (auto *SI = dyn_cast(BB->getTerminator())) { if (SimplifySwitch(SI, Builder)) return true; } else if (auto *UI = dyn_cast(BB->getTerminator())) { //移除不可达代码 例如assert(0);解引用空指针的后的代码 if (SimplifyUnreachable(UI)) return true; } else if (auto *IBI = dyn_cast(BB->getTerminator())) { if (SimplifyIndirectBr(IBI)) return true; }   return Changed; }   关于CFG的插入位置 关于CFG执行的次序。 SROA+CFG 消除6个块 CFG只能消除2个块   CFG 和 我们delay/@/wait/fork-join/neb 切割的想法   1, 我们的delay切割是阻塞的,不是立即跳转。不是一个立即的控制流。通过schedule 链接。不是call 语句。这种bb块是不可以简化切割的。不可以合并在一起。 stmt1 #1 stmt2   stmt1 ==》 stmt2 2, 对于while/if 的切割,这是单纯的控制流分析,这种控制流是可以简化的。