// Basic idea of the disassembly + discovery: // // start with the wanted address, insert it in the worklist // while worklist not empty, take next address in the worklist: // - check if atom exists there // - if middle of atom: // - split basic blocks referencing the atom // - look for an already encountered BBInfo (using a map<atom, bbinfo>) // - if there is, split it (new one, fallthrough, move succs, etc..) // - if start of atom: nothing else to do // - if no atom: create new atom and new bbinfo // - look at the last instruction in the atom, add succs to worklist // for all elements in the worklist: // - create basic block, update preds/succs, etc.. // void MCObjectDisassembler::disassembleFunctionAt( MCModule *Module, MCFunction *MCFN, uint64_t BBBeginAddr, AddressSetTy &CallTargets, AddressSetTy &TailCallTargets) { std::map<uint64_t, BBInfo> BBInfos; typedef SmallSetVector<uint64_t, 16> AddrWorklistTy; AddrWorklistTy Worklist; DEBUG(dbgs() << "Starting CFG at " << utohexstr(BBBeginAddr) << "\n"); Worklist.insert(BBBeginAddr); for (size_t wi = 0; wi < Worklist.size(); ++wi) { const uint64_t BeginAddr = Worklist[wi]; AddrPrettyStackTraceEntry X(BeginAddr, "Basic Block"); DEBUG(dbgs() << "Looking for block at " << utohexstr(BeginAddr) << "\n"); // Look for a BB at BeginAddr. auto BeforeIt = std::upper_bound( BBInfos.begin(), BBInfos.end(), BeginAddr, [](uint64_t Addr, const std::pair<uint64_t, BBInfo> &BBI) { return Addr < BBI.second.BeginAddr+BBI.second.SizeInBytes; }); assert((BeforeIt == BBInfos.end() || BeforeIt->first != BeginAddr) && "Visited same basic block twice!"); // Found a BB containing BeginAddr, we have to split it. if (BeforeIt != BBInfos.end() && BeforeIt->first < BeginAddr) { BBInfo &BeforeBB = BeforeIt->second; DEBUG(dbgs() << "Found block at " << utohexstr(BeforeBB.BeginAddr) << ", needs splitting at " << utohexstr(BeginAddr) << "\n"); assert(BeginAddr < BeforeBB.BeginAddr + BeforeBB.SizeInBytes && "Address isn't inside block?"); BBInfo &NewBB = BBInfos[BeginAddr]; NewBB.BeginAddr = BeginAddr; auto SplitInst = BeforeBB.Insts.end(); for (auto I = BeforeBB.Insts.begin(), E = BeforeBB.Insts.end(); I != E; ++I) { if (BeginAddr == I->Address) { SplitInst = I; break; } } assert(SplitInst != BeforeBB.Insts.end() && "Split point does not fall on an instruction boundary!"); // FIXME: use a list instead for free splicing? // Splice the remaining instructions to the new block. // While SplitInst is still valid, decrease the size to match. const uint64_t SplitOffset = SplitInst->Address - BeforeBB.BeginAddr; NewBB.SizeInBytes = BeforeBB.SizeInBytes - SplitOffset; BeforeBB.SizeInBytes = SplitOffset; // Now do the actual splicing out of BeforeBB. NewBB.Insts.insert(NewBB.Insts.begin(), SplitInst, BeforeBB.Insts.end()); BeforeBB.Insts.erase(SplitInst, BeforeBB.Insts.end()); // Move the successors to the new block. std::swap(NewBB.SuccAddrs, BeforeBB.SuccAddrs); BeforeBB.SuccAddrs.push_back(BeginAddr); } else { // If we didn't find a BB, then we have to disassemble to create one! const MemoryRegion &Region = getRegionFor(BeginAddr); if (Region.Bytes.empty()) report_fatal_error(("No suitable region for disassembly at 0x" + utohexstr(BeginAddr)).c_str()); const uint64_t EndRegion = Region.Addr + Region.Bytes.size(); uint64_t EndAddr = EndRegion; // We want to stop before the next BB and have a fallthrough to it. if (BeforeIt != BBInfos.end()) EndAddr = std::min(EndAddr, BeforeIt->first); BBInfo &BBI = BBInfos[BeginAddr]; BBI.BeginAddr = BeginAddr; assert(BBI.Insts.empty() && "Basic Block already exists!"); DEBUG(dbgs() << "No existing block found, starting disassembly from " << utohexstr(Region.Addr) << " to " << utohexstr(Region.Addr + Region.Bytes.size()) << "\n"); auto AddInst = [&](MCInst &I, uint64_t Addr, uint64_t Size) { const uint64_t NextAddr = BBI.BeginAddr + BBI.SizeInBytes; assert(NextAddr == Addr); BBI.Insts.emplace_back(I, NextAddr, Size); BBI.SizeInBytes += Size; }; uint64_t InstSize; for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) { MCInst Inst; if (Dis.getInstruction(Inst, InstSize, Region.Bytes.slice(Addr - Region.Addr), Addr, nulls(), nulls())) { AddInst(Inst, Addr, InstSize); } else { DEBUG(dbgs() << "Failed disassembly at " << utohexstr(Addr) << "!\n"); break; } uint64_t BranchTarget; if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) { DEBUG(dbgs() << "Found branch to " << utohexstr(BranchTarget) << "!\n"); if (MIA.isCall(Inst)) { DEBUG(dbgs() << "Found call!\n"); CallTargets.push_back(BranchTarget); } } if (MIA.isTerminator(Inst)) { DEBUG(dbgs() << "Found terminator!\n"); // Now we have a complete basic block, add successors. // Add the fallthrough block, and mark it for visiting. if (MIA.isConditionalBranch(Inst)) { BBI.SuccAddrs.push_back(Addr + InstSize); Worklist.insert(Addr + InstSize); } // If the terminator is a branch, add the target block. if (MIA.isBranch(Inst)) { uint64_t BranchTarget; if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) { StringRef ExtFnName; if (MOS && !(ExtFnName = MOS->findExternalFunctionAt(BranchTarget)) .empty()) { TailCallTargets.push_back(BranchTarget); CallTargets.push_back(BranchTarget); } else { BBI.SuccAddrs.push_back(BranchTarget); Worklist.insert(BranchTarget); } } } break; } } } } // First, create all blocks. for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { const uint64_t BeginAddr = Worklist[wi]; BBInfo *BBI = &BBInfos[BeginAddr]; MCBasicBlock *&MCBB = BBI->BB; MCBB = &MCFN->createBlock(BeginAddr); std::swap(MCBB->Insts, BBI->Insts); MCBB->InstCount = MCBB->Insts.size(); MCBB->SizeInBytes = BBI->SizeInBytes; } // Next, add all predecessors/successors. for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { const uint64_t BeginAddr = Worklist[wi]; BBInfo *BBI = &BBInfos[BeginAddr]; MCBasicBlock *&MCBB = BBI->BB; RemoveDupsFromAddressVector(BBI->SuccAddrs); for (uint64_t Address : BBI->SuccAddrs) { MCBasicBlock *Succ = BBInfos[Address].BB; assert(Succ && "Couldn't find block successor?!"); // FIXME: Sort the succs/preds at the end? MCBB->Successors.push_back(Succ); Succ->Predecessors.push_back(MCBB); } } }
void MCObjectDisassembler::buildCFG(MCModule *Module) { typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy; BBInfoByAddrTy BBInfos; AddressSetTy Splits; AddressSetTy Calls; error_code ec; for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols(); SI != SE; SI.increment(ec)) { if (ec) break; SymbolRef::Type SymType; SI->getType(SymType); if (SymType == SymbolRef::ST_Function) { uint64_t SymAddr; SI->getAddress(SymAddr); SymAddr = getEffectiveLoadAddr(SymAddr); Calls.push_back(SymAddr); Splits.push_back(SymAddr); } } assert(Module->func_begin() == Module->func_end() && "Module already has a CFG!"); // First, determine the basic block boundaries and call targets. for (MCModule::atom_iterator AI = Module->atom_begin(), AE = Module->atom_end(); AI != AE; ++AI) { MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); if (!TA) continue; Calls.push_back(TA->getBeginAddr()); BBInfos[TA->getBeginAddr()].Atom = TA; for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); II != IE; ++II) { if (MIA.isTerminator(II->Inst)) Splits.push_back(II->Address + II->Size); uint64_t Target; if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) { if (MIA.isCall(II->Inst)) Calls.push_back(Target); Splits.push_back(Target); } } } RemoveDupsFromAddressVector(Splits); RemoveDupsFromAddressVector(Calls); // Split text atoms into basic block atoms. for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end(); SI != SE; ++SI) { MCAtom *A = Module->findAtomContaining(*SI); if (!A) continue; MCTextAtom *TA = cast<MCTextAtom>(A); if (TA->getBeginAddr() == *SI) continue; MCTextAtom *NewAtom = TA->split(*SI); BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom; StringRef BBName = TA->getName(); BBName = BBName.substr(0, BBName.find_last_of(':')); NewAtom->setName((BBName + ":" + utohexstr(*SI)).str()); } // Compute succs/preds. for (MCModule::atom_iterator AI = Module->atom_begin(), AE = Module->atom_end(); AI != AE; ++AI) { MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); if (!TA) continue; BBInfo &CurBB = BBInfos[TA->getBeginAddr()]; const MCDecodedInst &LI = TA->back(); if (MIA.isBranch(LI.Inst)) { uint64_t Target; if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target)) CurBB.addSucc(BBInfos[Target]); if (MIA.isConditionalBranch(LI.Inst)) CurBB.addSucc(BBInfos[LI.Address + LI.Size]); } else if (!MIA.isTerminator(LI.Inst)) CurBB.addSucc(BBInfos[LI.Address + LI.Size]); } // Create functions and basic blocks. for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end(); CI != CE; ++CI) { BBInfo &BBI = BBInfos[*CI]; if (!BBI.Atom) continue; MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName()); // Create MCBBs. SmallSetVector<BBInfo*, 16> Worklist; Worklist.insert(&BBI); for (size_t wi = 0; wi < Worklist.size(); ++wi) { BBInfo *BBI = Worklist[wi]; if (!BBI->Atom) continue; BBI->BB = &MCFN.createBlock(*BBI->Atom); // Add all predecessors and successors to the worklist. for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE; ++SI) Worklist.insert(*SI); for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE; ++PI) Worklist.insert(*PI); } // Set preds/succs. for (size_t wi = 0; wi < Worklist.size(); ++wi) { BBInfo *BBI = Worklist[wi]; MCBasicBlock *MCBB = BBI->BB; if (!MCBB) continue; for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE; ++SI) if ((*SI)->BB) MCBB->addSuccessor((*SI)->BB); for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE; ++PI) if ((*PI)->BB) MCBB->addPredecessor((*PI)->BB); } } }