static bool isAddrRelocated(const object::SectionRef &sr, uint32_t offt, VA address) { llvm::object::relocation_iterator rit = sr.begin_relocations(); llvm::error_code e; while( rit != sr.end_relocations() ) { llvm::object::RelocationRef rref = *rit; llvm::object::SymbolRef symref; VA addr = 0; e = rref.getAddress((::uint64_t &)addr); LASSERT(!e, e.message()); e = rref.getSymbol(symref); LASSERT(!e, e.message()); if( address == (offt+addr) ) { StringRef symname; //get the symbol for the address? llvm::object::SymbolRef::Type t; uint32_t flag; //check and see if the symbols type is a global ... e = symref.getType(t); LASSERT(!e, e.message()); e = symref.getFlags(flag); LASSERT(!e, e.message()); symref.getName(symname); SmallString<32> relocType; e = rref.getTypeName(relocType); LASSERT(!e, e.message()); // shortcut for ELF relocations by type // TODO: move this to ELF speific code if(relocType == "R_386_32" || relocType == "R_386_PC32") { return true; } bool t1 = (t == llvm::object::SymbolRef::ST_Data); bool t2 = 0 != (flag | llvm::object::SymbolRef::SF_Global); if( (t1 && t2) || (t == llvm::object::SymbolRef::ST_Other) || (t == llvm::object::SymbolRef::ST_Unknown) ) { return true; } } rit.increment(e); LASSERT(!e, e.message()); } return false; }
static bool find_import_for_addr(object::SectionRef section, uint32_t offt, uint32_t target, std::string &import_name) { llvm::object::relocation_iterator rit = section.relocation_begin(); std::error_code ec; while( rit != section.relocation_end() ) { llvm::object::SymbolRef symref; VA addr = 0; ec = rit->getAddress((::uint64_t &)addr); LASSERT(!ec, "Can't get address for relocation ref"); llvm::dbgs() << "\t" << __FUNCTION__ << ": Testing " << to_string<VA>(target, hex) << " vs. " << to_string<VA>(addr+offt, hex) << "\n"; if( target == (addr+offt) ) { llvm::object::SymbolRef symref; symref = *rit->getSymbol(); llvm::StringRef strr; ec = symref.getName(strr); LASSERT(!ec, "Can't get name for symbol ref"); import_name = strr.str(); llvm::dbgs() << "Found symbol named: " << import_name << "\n"; ::uint64_t sym_addr; ec = symref.getAddress(sym_addr); if(ec) { llvm::dbgs() << "Could not get address of symbol: " << import_name << "\n"; } else { llvm::dbgs() << "Address for " << import_name << " is: " << to_string< ::uint64_t >(sym_addr, hex) << "\n"; } llvm::object::SymbolRef::Type symtype; ec = symref.getType(symtype); switch(symtype) { case llvm::object::SymbolRef::ST_Unknown: case llvm::object::SymbolRef::ST_Data: case llvm::object::SymbolRef::ST_Function: if( sym_addr == (::uint64_t)(-1) ) { return true; } else { llvm::dbgs() << "Skipping symbol due to address\n"; } break; default: llvm::dbgs() << "Skipping symbol since its probably not an import!" << "\n"; } } ++rit; } return false; }
void Disassembler::setSection(const object::SectionRef Section) { StringRef Bytes; uint64_t SectAddr, SectSize; std::error_code ec = Section.getContents(Bytes); if (ec) { printError(ec.message()); return; } ec = Section.getAddress(SectAddr); if (ec) { printError(ec.message()); return; } ec = Section.getSize(SectSize); if (ec) { printError(ec.message()); return; } CurSection = Section; CurSectionEnd = SectAddr + SectSize; CurSectionMemory = new StringRefMemoryObject(Bytes, SectAddr); StringRef SectionName; CurSection.getName(SectionName); printInfo("Setting Section " + std::string(SectionName.data())); // TODO: Add section relocations (if ncessary). // Make a list of all the relocations for this section. // error_code ec; // std::vector<object::RelocationRef> Rels; // for (relocation_iterator ri = Section.begin_relocations(), re = // Section.end_relocations(); ri != re; ri.increment(ec)) { // if (error(ec)) // break; // Rels.push_back(*ri); // } // Sort relocations by address. // std::sort(Rels.begin(), Rels.end(), relocAddressLess); // std::vector<RelocationRef>::const_iterator rel_cur = Rels.begin(); // std::vector<RelocationRef>::const_iterator rel_end = Rels.end(); }
Function* Decompiler::decompileFunction(unsigned Address) { // Check that Address is inside the current section. // TODO: Find a better way to do this check. What we really care about is // avoiding reads to library calls and areas of memory we can't "see". const object::SectionRef Sect = Dis->getCurrentSection(); uint64_t SectStart, SectEnd; Sect.getAddress(SectStart); Sect.getSize(SectEnd); SectEnd += SectStart; if (Address < SectStart || Address > SectEnd) { errs() << "Address out of bounds for section (is this a library call?): " << format("%1" PRIx64, Address) << "\n"; return NULL; } MachineFunction *MF = Dis->disassemble(Address); // Get Function Name // TODO: Determine Function Type FunctionType *FType = FunctionType::get(Type::getPrimitiveType(*Context, Type::VoidTyID), false); Function *F = cast<Function>(Mod->getOrInsertFunction(MF->getName(), FType)); if (!F->empty()) { return F; } // Create a basic block to hold entry point (alloca) information BasicBlock *entry = getOrCreateBasicBlock("entry", F); // For each basic block MachineFunction::iterator BI = MF->begin(), BE = MF->end(); while (BI != BE) { // Add branch from "entry" if (BI == MF->begin()) { entry->getInstList().push_back( BranchInst::Create(getOrCreateBasicBlock(BI->getName(), F))); } else { getOrCreateBasicBlock(BI->getName(), F); } ++BI; } BI = MF->begin(); while (BI != BE) { if (decompileBasicBlock(BI, F) == NULL) { printError("Unable to decompile basic block!"); } ++BI; } // During Decompilation, did any "in-between" basic blocks get created? // Nothing ever splits the entry block, so we skip it. for (Function::iterator I = ++F->begin(), E = F->end(); I != E; ++I) { if (!(I->empty())) { continue; } // Right now, the only way to get the right offset is to parse its name // it sucks, but it works. StringRef Name = I->getName(); if (Name == "end" || Name == "entry") continue; // these can be empty size_t Off = F->getName().size() + 1; size_t Size = Name.size() - Off; StringRef BBAddrStr = Name.substr(Off, Size); unsigned long long BBAddr; getAsUnsignedInteger(BBAddrStr, 10, BBAddr); BBAddr += Address; DEBUG(errs() << "Split Target: " << Name << "\t Address: " << BBAddr << "\n"); // split Block at AddrStr Function::iterator SB; // Split basic block BasicBlock::iterator SI, SE; // Split instruction // Note the ++, nothing ever splits the entry block. for (SB = ++F->begin(); SB != E; ++SB) { DEBUG(outs() << "SB: " << SB->getName() << "\tRange: " << Dis->getDebugOffset(SB->begin()->getDebugLoc()) << " " << Dis->getDebugOffset(SB->getTerminator()->getDebugLoc()) << "\n"); if (SB->empty() || BBAddr < getBasicBlockAddress(SB) || BBAddr > Dis->getDebugOffset(SB->getTerminator()->getDebugLoc())) { continue; } // Reorder instructions based on Debug Location sortBasicBlock(SB); DEBUG(errs() << "Found Split Block: " << SB->getName() << "\n"); // Find iterator to split on. for (SI = SB->begin(), SE = SB->end(); SI != SE; ++SI) { // outs() << "SI: " << SI->getDebugLoc().getLine() << "\n"; if (Dis->getDebugOffset(SI->getDebugLoc()) == BBAddr) break; if (Dis->getDebugOffset(SI->getDebugLoc()) > BBAddr) { errs() << "Could not find address inside basic block!\n" << "SI: " << Dis->getDebugOffset(SI->getDebugLoc()) << "\n" << "BBAddr: " << BBAddr << "\n"; break; } } break; } if (!SB || SI == SE || SB == E) { errs() << "Decompiler: Failed to find instruction offset in function!\n"; continue; } // outs() << SB->getName() << " " << SI->getName() << "\n"; // outs() << "Creating Block..."; splitBasicBlockIntoBlock(SB, SI, I); } // Clean up unnecessary stores and loads FunctionPassManager FPM(Mod); // FPM.add(createPromoteMemoryToRegisterPass()); // See Scalar.h for more. FPM.add(createTypeRecoveryPass()); FPM.run(*F); return F; }