// Disassemble the instructions in each procedure void BinUtil::TextSeg::ctor_disassembleProcs() { // ------------------------------------------------------------ // Disassemble the instructions in each procedure. // ------------------------------------------------------------ VMA sectionBase = begVMA(); for (ProcVec::iterator it = m_procs.begin(); it != m_procs.end(); ++it) { Proc* p = *it; VMA procBeg = p->begVMA(); VMA procEnd = p->endVMA(); ushort insnSz = 0; VMA lastInsnVMA = procBeg; // vma of last valid instruction in the proc // Iterate over each vma at which an instruction might begin for (VMA vma = procBeg; vma < procEnd; ) { MachInsn *mi = &(m_contents[vma - sectionBase]); insnSz = LM::isa->getInsnSize(mi); if (insnSz == 0) { // This is not a recognized instruction (cf. data on CISC ISAs). ++vma; // Increment the VMA, and try to decode again. continue; } int num_ops = LM::isa->getInsnNumOps(mi); if (num_ops == 0) { // This instruction contains data. No need to decode. vma += insnSz; continue; } // We have a valid instruction at this vma! lastInsnVMA = vma; for (ushort opIndex = 0; opIndex < num_ops; opIndex++) { Insn *newInsn = makeInsn(m_lm->abfd(), mi, vma, opIndex, insnSz); m_lm->insertInsn(vma, opIndex, newInsn); } vma += insnSz; } // 'insnSz' is now the size of the last instruction or 0 // Now we can update the procedure's end address and size since we // know where the last instruction begins. The procedure's // original end address was guessed to be the begin address of the // following procedure while determining all procedures above. p->endVMA(lastInsnVMA); p->size(p->endVMA() - p->begVMA() + insnSz); } }
void BinUtil::TextSeg::ctor_initProcs() { Dbg::LM* dbgInfo = m_lm->getDebugInfo(); // Any procedure with a parent has a <Proc*, parentVMA> entry std::map<Proc*, VMA> parentMap; // ------------------------------------------------------------ // Each text section finds and creates its own routines. // Traverse the symbol table (which is sorted by VMA) searching // for function symbols in our section. Create a Proc for // each one found. // // Note that symbols can appear multiple times (e.g. a weak symbol // 'sbrk' along with a gloabl symbol '__sbrk'), but we should not // have multiple procedures. // ------------------------------------------------------------ bfd* abfd = m_lm->abfd(); asymbol** symtab = m_lm->bfdSymTab(); // sorted uint symtabSz = m_lm->bfdSymTabSz(); // FIXME:PERF: exploit sortedness of 'symtab' to start iteration for (uint i = 0; i < symtabSz; i++) { asymbol* sym = symtab[i]; if (isIn(bfd_asymbol_value(sym)) && Proc::isProcBFDSym(sym)) { // NOTE: initially we have [begVMA, endVMA) where endVMA is the // *end* of the last insn. This is changed after decoding below. VMA begVMA = bfd_asymbol_value(sym); VMA endVMA = 0; Proc::Type procType; if (sym->flags & BSF_LOCAL) { procType = Proc::Local; } else if (sym->flags & BSF_WEAK) { procType = Proc::Weak; } else if (sym->flags & BSF_GLOBAL) { procType = Proc::Global; } else { procType = Proc::Unknown; } Proc* proc = m_lm->findProc(begVMA); if (proc) { DIAG_Assert(proc->begVMA() == begVMA, "TextSeg::ctor_initProcs: Procedure beginning at 0x" << hex << begVMA << " overlaps with:\n" << proc->toString()); if (procType == Proc::Global) { // 'global' types take precedence proc->type(procType); } continue; } // Create a procedure based on best information we have. We // always prefer explicit debug information over that inferred // from the symbol table. string procNm; string symNm = bfd_asymbol_name(sym); Dbg::LM::iterator it = dbgInfo->find(begVMA); Dbg::Proc* dbg = (it != dbgInfo->end()) ? it->second : NULL; if (!dbg) { procNm = findProcName(abfd, sym); string pnm = BinUtil::canonicalizeProcName(procNm); Dbg::LM::iterator1 it1 = dbgInfo->find1(pnm); dbg = (it1 != dbgInfo->end1()) ? it1->second : NULL; } if (!dbg) { Dbg::LM::iterator1 it1 = dbgInfo->find1(symNm); dbg = (it1 != dbgInfo->end1()) ? it1->second : NULL; } // Finding the end VMA (end of last insn). The computation is // as follows because sometimes the debug information is // *wrong*. (Intel 9 has generated significant over-estimates). // // N.B. exploits the fact that the symbol table is sorted by vma VMA endVMA_approx = findProcEnd(i); if (dbg) { if (!dbg->name.empty()) { procNm = dbg->name; } else if (!symNm.empty()) { // sometimes a procedure name is in the symbol table even // though it is not in the dwarf section. this case occurs // when gcc outlines routines from OpenMP parallel sections. procNm = symNm; } #if 1 // Remove capability below... the DWARF sizes can be wrong!! endVMA = endVMA_approx; #else endVMA = std::min(dbg->endVMA, endVMA_approx); if (endVMA != endVMA_approx) { int64_t diff = endVMA - endVMA_approx; DIAG_DevMsg(0, procNm << ": inconsistent end VMA: " << diff << " [" << std::showbase << std::hex << begVMA << "-" << endVMA << "/" << endVMA_approx << std::dec << "]"); } #endif } if (!dbg || endVMA == 0) { endVMA = endVMA_approx; } uint size = endVMA - begVMA; if (size == 0) { continue; } // We now have a valid procedure. Initilize with [begVMA, endVMA), // but note this is changed after disassembly. proc = new Proc(this, procNm, symNm, procType, begVMA, endVMA, size); m_procs.push_back(proc); m_lm->insertProc(VMAInterval(begVMA, endVMA), proc); // Add symbolic info if (dbg) { proc->filename(dbg->filenm); proc->begLine(dbg->begLine); if (dbg->parent) { parentMap.insert(std::make_pair(proc, dbg->parent->begVMA)); } } } } // ------------------------------------------------------------ // If a text section does not have any function symbols, consider // the whole section a quasi procedure // ------------------------------------------------------------ if (numProcs() == 0) { // [begVMA, endVMA) Proc* proc = new Proc(this, name(), name(), Proc::Quasi, begVMA(), endVMA(), size()); m_procs.push_back(proc); m_lm->insertProc(VMAInterval(begVMA(), endVMA()), proc); } // ------------------------------------------------------------ // Embed parent information // ------------------------------------------------------------ for (std::map<Proc*, VMA>::iterator it = parentMap.begin(); it != parentMap.end(); ++it) { Proc* child = it->first; VMA parentVMA = it->second; Proc* parent = m_lm->findProc(parentVMA); DIAG_AssertWarn(parent, "Could not find parent within this section:\n" << child->toString()); if (parent == child) { DIAG_WMsg(0, "Procedure has itself as parent!\n" << child->toString()); continue; // skip } child->parent(parent); } }