// Basic idea of the disassembly + discovery: // // start with the wanted address, insert it in the worklist // while worklist not empty, take next address in the worklist: // - check if atom exists there // - if middle of atom: // - split basic blocks referencing the atom // - look for an already encountered BBInfo (using a map<atom, bbinfo>) // - if there is, split it (new one, fallthrough, move succs, etc..) // - if start of atom: nothing else to do // - if no atom: create new atom and new bbinfo // - look at the last instruction in the atom, add succs to worklist // for all elements in the worklist: // - create basic block, update preds/succs, etc.. // void MCObjectDisassembler::disassembleFunctionAt( MCModule *Module, MCFunction *MCFN, uint64_t BBBeginAddr, AddressSetTy &CallTargets, AddressSetTy &TailCallTargets) { std::map<uint64_t, BBInfo> BBInfos; typedef SmallSetVector<uint64_t, 16> AddrWorklistTy; AddrWorklistTy Worklist; DEBUG(dbgs() << "Starting CFG at " << utohexstr(BBBeginAddr) << "\n"); Worklist.insert(BBBeginAddr); for (size_t wi = 0; wi < Worklist.size(); ++wi) { const uint64_t BeginAddr = Worklist[wi]; AddrPrettyStackTraceEntry X(BeginAddr, "Basic Block"); DEBUG(dbgs() << "Looking for block at " << utohexstr(BeginAddr) << "\n"); // Look for a BB at BeginAddr. auto BeforeIt = std::upper_bound( BBInfos.begin(), BBInfos.end(), BeginAddr, [](uint64_t Addr, const std::pair<uint64_t, BBInfo> &BBI) { return Addr < BBI.second.BeginAddr+BBI.second.SizeInBytes; }); assert((BeforeIt == BBInfos.end() || BeforeIt->first != BeginAddr) && "Visited same basic block twice!"); // Found a BB containing BeginAddr, we have to split it. if (BeforeIt != BBInfos.end() && BeforeIt->first < BeginAddr) { BBInfo &BeforeBB = BeforeIt->second; DEBUG(dbgs() << "Found block at " << utohexstr(BeforeBB.BeginAddr) << ", needs splitting at " << utohexstr(BeginAddr) << "\n"); assert(BeginAddr < BeforeBB.BeginAddr + BeforeBB.SizeInBytes && "Address isn't inside block?"); BBInfo &NewBB = BBInfos[BeginAddr]; NewBB.BeginAddr = BeginAddr; auto SplitInst = BeforeBB.Insts.end(); for (auto I = BeforeBB.Insts.begin(), E = BeforeBB.Insts.end(); I != E; ++I) { if (BeginAddr == I->Address) { SplitInst = I; break; } } assert(SplitInst != BeforeBB.Insts.end() && "Split point does not fall on an instruction boundary!"); // FIXME: use a list instead for free splicing? // Splice the remaining instructions to the new block. // While SplitInst is still valid, decrease the size to match. const uint64_t SplitOffset = SplitInst->Address - BeforeBB.BeginAddr; NewBB.SizeInBytes = BeforeBB.SizeInBytes - SplitOffset; BeforeBB.SizeInBytes = SplitOffset; // Now do the actual splicing out of BeforeBB. NewBB.Insts.insert(NewBB.Insts.begin(), SplitInst, BeforeBB.Insts.end()); BeforeBB.Insts.erase(SplitInst, BeforeBB.Insts.end()); // Move the successors to the new block. std::swap(NewBB.SuccAddrs, BeforeBB.SuccAddrs); BeforeBB.SuccAddrs.push_back(BeginAddr); } else { // If we didn't find a BB, then we have to disassemble to create one! const MemoryRegion &Region = getRegionFor(BeginAddr); if (Region.Bytes.empty()) report_fatal_error(("No suitable region for disassembly at 0x" + utohexstr(BeginAddr)).c_str()); const uint64_t EndRegion = Region.Addr + Region.Bytes.size(); uint64_t EndAddr = EndRegion; // We want to stop before the next BB and have a fallthrough to it. if (BeforeIt != BBInfos.end()) EndAddr = std::min(EndAddr, BeforeIt->first); BBInfo &BBI = BBInfos[BeginAddr]; BBI.BeginAddr = BeginAddr; assert(BBI.Insts.empty() && "Basic Block already exists!"); DEBUG(dbgs() << "No existing block found, starting disassembly from " << utohexstr(Region.Addr) << " to " << utohexstr(Region.Addr + Region.Bytes.size()) << "\n"); auto AddInst = [&](MCInst &I, uint64_t Addr, uint64_t Size) { const uint64_t NextAddr = BBI.BeginAddr + BBI.SizeInBytes; assert(NextAddr == Addr); BBI.Insts.emplace_back(I, NextAddr, Size); BBI.SizeInBytes += Size; }; uint64_t InstSize; for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) { MCInst Inst; if (Dis.getInstruction(Inst, InstSize, Region.Bytes.slice(Addr - Region.Addr), Addr, nulls(), nulls())) { AddInst(Inst, Addr, InstSize); } else { DEBUG(dbgs() << "Failed disassembly at " << utohexstr(Addr) << "!\n"); break; } uint64_t BranchTarget; if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) { DEBUG(dbgs() << "Found branch to " << utohexstr(BranchTarget) << "!\n"); if (MIA.isCall(Inst)) { DEBUG(dbgs() << "Found call!\n"); CallTargets.push_back(BranchTarget); } } if (MIA.isTerminator(Inst)) { DEBUG(dbgs() << "Found terminator!\n"); // Now we have a complete basic block, add successors. // Add the fallthrough block, and mark it for visiting. if (MIA.isConditionalBranch(Inst)) { BBI.SuccAddrs.push_back(Addr + InstSize); Worklist.insert(Addr + InstSize); } // If the terminator is a branch, add the target block. if (MIA.isBranch(Inst)) { uint64_t BranchTarget; if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) { StringRef ExtFnName; if (MOS && !(ExtFnName = MOS->findExternalFunctionAt(BranchTarget)) .empty()) { TailCallTargets.push_back(BranchTarget); CallTargets.push_back(BranchTarget); } else { BBI.SuccAddrs.push_back(BranchTarget); Worklist.insert(BranchTarget); } } } break; } } } } // First, create all blocks. for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { const uint64_t BeginAddr = Worklist[wi]; BBInfo *BBI = &BBInfos[BeginAddr]; MCBasicBlock *&MCBB = BBI->BB; MCBB = &MCFN->createBlock(BeginAddr); std::swap(MCBB->Insts, BBI->Insts); MCBB->InstCount = MCBB->Insts.size(); MCBB->SizeInBytes = BBI->SizeInBytes; } // Next, add all predecessors/successors. for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { const uint64_t BeginAddr = Worklist[wi]; BBInfo *BBI = &BBInfos[BeginAddr]; MCBasicBlock *&MCBB = BBI->BB; RemoveDupsFromAddressVector(BBI->SuccAddrs); for (uint64_t Address : BBI->SuccAddrs) { MCBasicBlock *Succ = BBInfos[Address].BB; assert(Succ && "Couldn't find block successor?!"); // FIXME: Sort the succs/preds at the end? MCBB->Successors.push_back(Succ); Succ->Predecessors.push_back(MCBB); } } }
// Basic idea of the disassembly + discovery: // // start with the wanted address, insert it in the worklist // while worklist not empty, take next address in the worklist: // - check if atom exists there // - if middle of atom: // - split basic blocks referencing the atom // - look for an already encountered BBInfo (using a map<atom, bbinfo>) // - if there is, split it (new one, fallthrough, move succs, etc..) // - if start of atom: nothing else to do // - if no atom: create new atom and new bbinfo // - look at the last instruction in the atom, add succs to worklist // for all elements in the worklist: // - create basic block, update preds/succs, etc.. // MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN, uint64_t BBBeginAddr, AddressSetTy &CallTargets, AddressSetTy &TailCallTargets) { typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy; typedef SmallSetVector<uint64_t, 16> AddrWorklistTy; BBInfoByAddrTy BBInfos; AddrWorklistTy Worklist; Worklist.insert(BBBeginAddr); for (size_t wi = 0; wi < Worklist.size(); ++wi) { const uint64_t BeginAddr = Worklist[wi]; BBInfo *BBI = &BBInfos[BeginAddr]; MCTextAtom *&TA = BBI->Atom; assert(!TA && "Discovered basic block already has an associated atom!"); // Look for an atom at BeginAddr. if (MCAtom *A = Module->findAtomContaining(BeginAddr)) { // FIXME: We don't care about mixed atoms, see above. TA = cast<MCTextAtom>(A); // The found atom doesn't begin at BeginAddr, we have to split it. if (TA->getBeginAddr() != BeginAddr) { // FIXME: Handle overlapping atoms: middle-starting instructions, etc.. MCTextAtom *NewTA = TA->split(BeginAddr); // Look for an already encountered basic block that needs splitting BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr()); if (It != BBInfos.end() && It->second.Atom) { BBI->SuccAddrs = It->second.SuccAddrs; It->second.SuccAddrs.clear(); It->second.SuccAddrs.push_back(BeginAddr); } TA = NewTA; } BBI->Atom = TA; } else { // If we didn't find an atom, then we have to disassemble to create one! MemoryObject *Region = getRegionFor(BeginAddr); if (!Region) llvm_unreachable(("Couldn't find suitable region for disassembly at " + utostr(BeginAddr)).c_str()); uint64_t InstSize; uint64_t EndAddr = Region->getBase() + Region->getExtent(); // We want to stop before the next atom and have a fallthrough to it. if (MCTextAtom *NextAtom = cast_or_null<MCTextAtom>(Module->findFirstAtomAfter(BeginAddr))) EndAddr = std::min(EndAddr, NextAtom->getBeginAddr()); for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) { MCInst Inst; if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(), nulls())) { if (!TA) TA = Module->createTextAtom(Addr, Addr); TA->addInst(Inst, InstSize); } else { // We don't care about splitting mixed atoms either. llvm_unreachable("Couldn't disassemble instruction in atom."); } uint64_t BranchTarget; if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) { if (MIA.isCall(Inst)) CallTargets.push_back(BranchTarget); } if (MIA.isTerminator(Inst)) break; } BBI->Atom = TA; } assert(TA && "Couldn't disassemble atom, none was created!"); assert(TA->begin() != TA->end() && "Empty atom!"); MemoryObject *Region = getRegionFor(TA->getBeginAddr()); assert(Region && "Couldn't find region for already disassembled code!"); uint64_t EndRegion = Region->getBase() + Region->getExtent(); // Now we have a basic block atom, add successors. // Add the fallthrough block. if ((MIA.isConditionalBranch(TA->back().Inst) || !MIA.isTerminator(TA->back().Inst)) && (TA->getEndAddr() + 1 < EndRegion)) { BBI->SuccAddrs.push_back(TA->getEndAddr() + 1); Worklist.insert(TA->getEndAddr() + 1); } // If the terminator is a branch, add the target block. if (MIA.isBranch(TA->back().Inst)) { uint64_t BranchTarget; if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address, TA->back().Size, BranchTarget)) { StringRef ExtFnName; if (MOS) ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget)); if (!ExtFnName.empty()) { TailCallTargets.push_back(BranchTarget); CallTargets.push_back(BranchTarget); } else { BBI->SuccAddrs.push_back(BranchTarget); Worklist.insert(BranchTarget); } } } } for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { const uint64_t BeginAddr = Worklist[wi]; BBInfo *BBI = &BBInfos[BeginAddr]; assert(BBI->Atom && "Found a basic block without an associated atom!"); // Look for a basic block at BeginAddr. BBI->BB = MCFN->find(BeginAddr); if (BBI->BB) { // FIXME: check that the succs/preds are the same continue; } // If there was none, we have to create one from the atom. BBI->BB = &MCFN->createBlock(*BBI->Atom); } for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { const uint64_t BeginAddr = Worklist[wi]; BBInfo *BBI = &BBInfos[BeginAddr]; MCBasicBlock *BB = BBI->BB; RemoveDupsFromAddressVector(BBI->SuccAddrs); for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(), SE = BBI->SuccAddrs.end(); SE != SE; ++SI) { MCBasicBlock *Succ = BBInfos[*SI].BB; BB->addSuccessor(Succ); Succ->addPredecessor(BB); } } assert(BBInfos[Worklist[0]].BB && "No basic block created at requested address?"); return BBInfos[Worklist[0]].BB; }