int main(int argc, char *argv[]) { Diagnostics::initialize(); ::mlog = Diagnostics::Facility("tool", Diagnostics::destination); Diagnostics::mfacilities.insertAndAdjust(::mlog); // Parse the command-line Partitioner2::Engine engine; std::vector<std::string> specimenNames = parseCommandLine(argc, argv, engine); if (specimenNames.empty()) throw std::runtime_error("no specimen specified; see --help"); // Load specimen into memory MemoryMap map = engine.loadSpecimens(specimenNames); // Configure instruction semantics Partitioner2::Partitioner partitioner = engine.createPartitioner(); Disassembler *disassembler = engine.obtainDisassembler(); const RegisterDictionary *regdict = disassembler->get_registers(); if (disassembler->dispatcher() == NULL) throw std::runtime_error("no instruction semantics for this architecture"); BaseSemantics::RiscOperatorsPtr ops = InstructionSemantics2::ConcreteSemantics::RiscOperators::instance(regdict); BaseSemantics::DispatcherPtr cpu = disassembler->dispatcher()->create(ops); ConcreteSemantics::MemoryState::promote(ops->currentState()->memoryState())->memoryMap(map); // Find starting address rose_addr_t va = 0; if (settings.startVa) { va = *settings.startVa; } else if (engine.isaName() == "coldfire") { // Use the interrupt vector to initialize the stack pointer and instruction pointer. uint32_t sp, ip; if (4 != map.at(0).limit(4).read((uint8_t*)&sp).size()) throw std::runtime_error("cannot read stack pointer at address 0x00000000"); ops->writeRegister(disassembler->stackPointerRegister(), ops->number_(32, ByteOrder::be_to_host(sp))); if (4 != map.at(4).limit(4).read((uint8_t*)&ip).size()) throw std::runtime_error("cannot read instruction pointer at address 0x00000004"); va = ByteOrder::be_to_host(ip); } else if (!map.atOrAfter(0).require(MemoryMap::EXECUTABLE).next().assignTo(va)) { throw std::runtime_error("no starting address specified and none marked executable"); } ops->writeRegister(disassembler->instructionPointerRegister(), ops->number_(32, va)); // Execute map.dump(::mlog[INFO]); while (1) { va = ops->readRegister(disassembler->instructionPointerRegister())->get_number(); SgAsmInstruction *insn = partitioner.instructionProvider()[va]; SAWYER_MESG(::mlog[TRACE]) <<unparseInstructionWithAddress(insn, NULL, regdict) <<"\n"; try { cpu->processInstruction(insn); } catch (const BaseSemantics::Exception &e) { ::mlog[WARN] <<e <<"\n"; } } // std::cout <<"Final state:\n"; // std::cout <<*ops->currentState(); }
static void listInstructions(const InstructionProvider::Ptr &insns, const MemoryMap &map, const FunctionByAddress &code1, FunctionByAddress &code2) { std::ostream &out = std::cout; static const size_t insnWidth = 110; rose_addr_t va1 = code1.hull().least(); rose_addr_t va2 = code2.hull().least(); rose_addr_t va = std::min(va1, va2); rose_addr_t expectedVa = va; AsmUnparser unparser; while (va<=code1.hull().greatest() || va<=code2.hull().greatest()) { // Address and contents if (va != expectedVa) out <<"\n"; // visual cue that addresses are not sequential here std::ostringstream ss; size_t size; if (!map.at(va).require(MemoryMap::EXECUTABLE).exists()) { ss <<StringUtility::addrToString(va) <<": " <<(map.at(va).exists() ? "not executable" : "not mapped"); size = 1; } else if (SgAsmInstruction *insn = (*insns)[va]) { unparser.unparse(ss, insn); size = insn->get_size(); } else { ss <<StringUtility::addrToString(va) <<": bad instruction"; size = 1; } std::vector<std::string> lines = StringUtility::split('\n', ss.str()); while (lines.size()>0 && lines[lines.size()-1]=="") lines.pop_back(); for (size_t i=0; i<lines.size(); ++i) { if (i+1 < lines.size()) { out <<lines[i] <<"\n"; } else { out <<std::setw(insnWidth) <<std::left <<lines[i]; } } // Functions owning Sawyer::Optional<rose_addr_t> f1 = code1.getOptional(va); Sawyer::Optional<rose_addr_t> f2 = code2.getOptional(va); out <<"\t" <<std::setw(10) <<std::left <<(f1 ? StringUtility::addrToString(*f1) : std::string("none")); out <<"\t" <<std::setw(10) <<std::left <<(f2 ? StringUtility::addrToString(*f2) : std::string("none")); out <<" " <<(f1.isEqual(f2) ? "" : "<---") <<"\n"; // Advance address pointer rose_addr_t next = va + size; expectedVa = next; FunctionByAddress::ConstIntervalIterator i1 = code1.upperBound(va); if (i1!=code1.nodes().end() && i1->least() < next) next = i1->least(); FunctionByAddress::ConstIntervalIterator i2 = code2.upperBound(va); if (i2!=code2.nodes().end() && i2->least() < next) next = i2->least(); if (!map.atOrAfter(next).next().assignTo(va)) break; } }
std::string MagicNumber::identify(const MemoryMap &map, rose_addr_t va) const { uint8_t buf[256]; size_t nBytes = map.at(va).limit(std::min(maxBytes_, sizeof buf)).read(buf).size(); if (0==nBytes) return "empty"; #ifdef ROSE_HAVE_LIBMAGIC return magic_buffer(details_->cookie, buf, nBytes); #elif defined(BOOST_WINDOWS) throw std::runtime_error("magic number identification is not supported on Microsoft Windows"); #elif BOOST_FILESYSTEM_VERSION == 2 throw std::runtime_error("MagicNumber::identify must have either libmagic or boost::filesystem version 3"); #else // We can maybe still do it, but this will be much, much slower. We copy some specimen memory into a temporary file, then // run the unix file(1) command on it, then delete the temp file. static int ncalls = 0; if (1 == ++ncalls) mlog[WARN] <<"libmagic is not available on this system; using slow method instead\n"; FileSystem::Path tmpFile = boost::filesystem::unique_path("/tmp/ROSE-%%%%-%%%%-%%%%-%%%%"); std::ofstream(tmpFile.c_str()).write((const char*)buf, nBytes); std::string cmd = "file " + tmpFile.string(); std::string magic; if (FILE *f = popen(cmd.c_str(), "r")) { char line[1024]; if (fgets(line, sizeof line, f)) magic = boost::trim_right_copy(std::string(line).substr(tmpFile.string().size()+2)); // filename + ": " pclose(f); } else { boost::filesystem::remove(tmpFile); throw std::runtime_error("command file: " + tmpFile.string()); } boost::filesystem::remove(tmpFile); return magic; #endif }
int main(int argc, char *argv[]) { // Parse command-line int argno=1; for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) { if (!strcmp(argv[argno], "--")) { ++argno; break; } else { std::cerr <<argv[0] <<": unrecognized switch: " <<argv[argno] <<"\n"; exit(1); } } if (argno+1!=argc) { std::cerr <<"usage: " <<argv[0] <<" [SWITCHES] [--] SPECIMEN\n"; exit(1); } std::string specimen_name = argv[argno++]; // Open the file rose_addr_t start_va = 0; MemoryMap map; size_t file_size = map.insertFile(specimen_name, start_va); map.at(start_va).limit(file_size).changeAccess(MemoryMap::EXECUTABLE, 0); // Try to disassemble every byte, and print the CALL/FARCALL targets InstructionMap insns; size_t nerrors=0; Disassembler *disassembler = new DisassemblerX86(4); for (rose_addr_t offset=0; offset<file_size; ++offset) { try { rose_addr_t insn_va = start_va + offset; if (SgAsmX86Instruction *insn = isSgAsmX86Instruction(disassembler->disassembleOne(&map, insn_va))) insns[insn_va] = insn; } catch (const Disassembler::Exception &e) { ++nerrors; } } // Partition those instructions into basic blocks and functions Partitioner partitioner; SgAsmBlock *gblock = partitioner.partition(NULL, insns, &map); // Print addresses of functions struct T1: AstSimpleProcessing { void visit(SgNode *node) { if (SgAsmFunction *func = isSgAsmFunction(node)) std::cout <<StringUtility::addrToString(func->get_entry_va()) <<"\n"; } }; T1().traverse(gblock, preorder); std::cerr <<specimen_name <<": " <<insns.size() <<" instructions; " <<nerrors <<" errors\n"; return 0; }
// class method rose_addr_t SRecord::load(const std::vector<SRecord> &srecs, MemoryMap &map, bool createSegments, unsigned accessPerms) { if (createSegments) { // We want to minimize the number of buffers in the map, so the first step is to discover what addresses are covered by // the data S-records Sawyer::Container::IntervalSet<AddressInterval> addressesUsed; BOOST_FOREACH (const SRecord &srec, srecs) { switch (srec.type()) { case SREC_DATA16: case SREC_DATA24: case SREC_DATA32: addressesUsed.insert(AddressInterval::baseSize(srec.address(), srec.data().size())); break; default: break; } } // Create buffers for the data and insert them into the memory map BOOST_FOREACH (const AddressInterval &interval, addressesUsed.intervals()) { ASSERT_forbid(interval.isWhole()); // not practically possible since S-Record file would be >2^65 bytes map.insert(interval, MemoryMap::Segment::anonymousInstance(interval.size(), accessPerms, "S-Records")); } } // Populate the map by writing the S-Record data into it. rose_addr_t startingAddr = 0; BOOST_FOREACH (const SRecord &srec, srecs) { switch (srec.type()) { case SREC_DATA16: case SREC_DATA24: case SREC_DATA32: { if (!srec.data().empty()) { size_t nwritten = map.at(srec.address()).write(srec.data()).size(); if (nwritten != srec.data().size()) throw MemoryMap::NotMapped("S-Record destination is not mapped for " + StringUtility::plural(srec.data().size(), "bytes"), &map, srec.address()); } break; } case SREC_START16: case SREC_START24: case SREC_START32: startingAddr = srec.address(); break; default: break; } } return startingAddr; }
int main(int argc, char *argv[]) { // Parse command-line int argno=1; for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) { if (!strcmp(argv[argno], "--")) { ++argno; break; } else { std::cerr <<argv[0] <<": unrecognized switch: " <<argv[argno] <<"\n"; exit(1); } } if (argno+1!=argc) { std::cerr <<"usage: " <<argv[0] <<" [SWITCHES] [--] SPECIMEN\n"; exit(1); } std::string specimen_name = argv[argno++]; // Open the file rose_addr_t start_va = 0; MemoryMap map; size_t file_size = map.insertFile(specimen_name, start_va); map.at(start_va).limit(file_size).changeAccess(MemoryMap::EXECUTABLE, 0); // Try to disassemble every byte, and print the CALL/FARCALL targets size_t ninsns=0, nerrors=0; Disassembler *disassembler = new DisassemblerX86(4); for (rose_addr_t offset=0; offset<file_size; ++offset) { try { rose_addr_t insn_va = start_va + offset; SgAsmX86Instruction *insn = isSgAsmX86Instruction(disassembler->disassembleOne(&map, insn_va)); if (insn && (x86_call==insn->get_kind() || x86_farcall==insn->get_kind())) { ++ninsns; rose_addr_t target_va; if (insn->getBranchTarget(&target_va)) std::cout <<StringUtility::addrToString(insn_va) <<": " <<StringUtility::addrToString(target_va) <<"\n"; } } catch (const Disassembler::Exception &e) { ++nerrors; } } std::cerr <<specimen_name <<": " <<ninsns <<" instructions; " <<nerrors <<" errors\n"; return 0; }
int main(int argc, char *argv[]) { ROSE_INITIALIZE; Diagnostics::initAndRegister(mlog, "tool"); Sawyer::ProgressBarSettings::minimumUpdateInterval(0.2); // more fluid spinner // Parse command-line P2::Engine engine; Settings settings; std::vector<std::string> args = parseCommandLine(argc, argv, engine, settings); ASSERT_always_require2(args.size() >= 2, "incorrect usage; see --help"); // Parse file containing instruction addresses std::string addrFileName = args[0]; std::set<rose_addr_t> knownVas = parseAddressFile(addrFileName); mlog[INFO] <<"parsed " <<plural(knownVas.size(), "unique addresses") <<"\n"; // Load specimen natively and attach debugger std::vector<std::string> specimen_cmd(args.begin()+1, args.end()); BinaryDebugger debugger(specimen_cmd); debugger.setBreakpoint(AddressInterval::whole()); ASSERT_always_require(debugger.isAttached()); ASSERT_always_forbid(debugger.isTerminated()); pid_t pid = debugger.isAttached(); mlog[INFO] <<"child PID " <<pid <<"\n"; // Get memory map. MemoryMap map; if (MAP_ROSE==settings.mapSource) { map = engine.loadSpecimens(specimen_cmd[0]); } else { map.insertProcess(":noattach:" + numberToString(pid)); } map.dump(mlog[INFO]); // The addresses specified in the instruction address file must all be in memory that is mapped. BOOST_FOREACH (rose_addr_t va, knownVas) { ASSERT_always_require2(map.at(va).require(MemoryMap::EXECUTABLE).exists(), "given address " + addrToString(va) + " is not mapped or lacks execute permission"); }
int main(int argc, char *argv[]) { Diagnostics::initialize(); BinaryAnalysis::Partitioner2::Engine engine; Settings settings; std::vector<std::string> specimenNames = parseCommandLine(argc, argv, engine, settings /*in,out*/); BinaryAnalysis::MagicNumber analyzer; analyzer.maxBytesToCheck(settings.maxBytes); MemoryMap map = engine.loadSpecimens(specimenNames); map.dump(mlog[INFO]); size_t step = std::max(size_t(1), settings.step); AddressInterval limits = settings.limits.isEmpty() ? map.hull() : (settings.limits & map.hull()); Sawyer::Container::IntervalSet<AddressInterval> addresses(map); addresses.intersect(limits); size_t nPositions = addresses.size() / step; mlog[INFO] <<"approximately " <<StringUtility::plural(nPositions, "positions") <<" to check\n"; { Sawyer::ProgressBar<size_t> progress(nPositions, mlog[INFO], "positions"); for (rose_addr_t va=limits.least(); va<=limits.greatest() && map.atOrAfter(va).next().assignTo(va); va+=step, ++progress) { std::string magicString = analyzer.identify(map, va); if (magicString!="data") { // runs home to Momma when it gets confused uint8_t buf[8]; size_t nBytes = map.at(va).limit(sizeof buf).read(buf).size(); std::cout <<StringUtility::addrToString(va) <<" |" <<leadingBytes(buf, nBytes) <<" | " <<magicString <<"\n"; } if (va==limits.greatest()) break; // prevent overflow at top of address space } } }
// class method size_t SRecord::dump(const MemoryMap &map, std::ostream &out, size_t addrSize) { ASSERT_require(2==addrSize || 3==addrSize || 4==addrSize); SRecord::Type type = SREC_NONE; switch (addrSize) { case 2: type = SREC_DATA16; break; case 3: type = SREC_DATA24; break; case 4: type = SREC_DATA32; break; } size_t nRecords = 0; rose_addr_t va = 0; static const size_t maxBytesPerRecord = 28; // common value so each S-Record fits on an 80-character screen uint8_t buffer[maxBytesPerRecord]; while (map.atOrAfter(va).next().assignTo(va)) { size_t nread = map.at(va).limit(maxBytesPerRecord).read(buffer).size(); ASSERT_require(nread>0); // since map.next() returned true SRecord srec(type, va, buffer, nread); out <<srec <<"\n"; va += nread; ++nRecords; } return nRecords; }
static inline bool isGoodAddr(const std::set<rose_addr_t> &goodVas, const MemoryMap &map, rose_addr_t va) { return !map.at(va).exists() || goodVas.find(va)!=goodVas.end(); }
/* Looks at the RVA/Size pairs in the PE header and creates an SgAsmGenericSection object for each one. This must be done * after we build the mapping from virtual addresses to file offsets. */ void SgAsmPEFileHeader::create_table_sections() { /* First, only create the sections. */ for (size_t i=0; i<p_rvasize_pairs->get_pairs().size(); i++) { SgAsmPERVASizePair *pair = p_rvasize_pairs->get_pairs()[i]; if (0==pair->get_e_size()) continue; /* Table names come from PE file specification and are hard coded by RVA/Size pair index */ const char *tabname_short; std::string tabname = rvasize_pair_name((PairPurpose)i, &tabname_short); /* Find the starting offset in the file. * FIXME: We have a potential problem here in that ROSE sections are always contiguous in the file but a section created * from an RVA/Size pair is not necessarily contiguous in the file. Normally such sections are in fact * contiguous and we'll just ignore this for now. In any case, as long as these sections only ever read their * data via the same MemoryMap that we use here, everything should be fine. [RPM 2009-08-17] */ rose_addr_t pair_va = get_base_va() + pair->get_e_rva(); MemoryMap *map = get_loader_map(); ROSE_ASSERT(map!=NULL); if (!map->exists(Extent(pair_va, pair->get_e_size()))) { fprintf(stderr, "SgAsmPEFileHeader::create_table_sections: warning: pair-%zu, rva=0x%08"PRIx64", size=%"PRIu64 " bytes \"%s\": unable to find a mapping for the virtual address (skipping)\n", i, pair->get_e_rva().get_rva(), pair->get_e_size(), tabname.c_str()); continue; } std::pair<Extent, MemoryMap::Segment> me = map->at(pair_va); rose_addr_t file_offset = me.second.get_buffer_offset(me.first, pair_va); /* Create the new section */ SgAsmGenericSection *tabsec = NULL; switch (i) { case 0: { /* Sometimes export sections are represented by a ".edata" section, and sometimes they're represented by an * RVA/Size pair, and sometimes both point to the same part of the file. We don't want the exports duplicated * in the AST, so we only create this table as exports if we haven't already seen some other export section. */ SgAsmGenericSectionPtrList §ions = get_sections()->get_sections(); bool seen_exports = false; for (SgAsmGenericSectionPtrList::iterator si=sections.begin(); !seen_exports && si!=sections.end(); ++si) seen_exports = isSgAsmPEExportSection(*si); if (seen_exports) { tabsec = new SgAsmGenericSection(get_file(), this); } else { tabsec = new SgAsmPEExportSection(this); } break; } case 1: { /* Sometimes import sections are represented by a ".idata" section, and sometimes they're represented by an * RVA/Size pair, and sometimes both point to the same part of the file. We don't want the imports duplicated * in the AST, so we only create this table as imports if we haven't already seen some other import section. */ SgAsmGenericSectionPtrList §ions = get_sections()->get_sections(); bool seen_imports = false; for (SgAsmGenericSectionPtrList::iterator si=sections.begin(); !seen_imports && si!=sections.end(); ++si) seen_imports = isSgAsmPEImportSection(*si); if (seen_imports) { tabsec = new SgAsmGenericSection(get_file(), this); } else { tabsec = new SgAsmPEImportSection(this); } break; } default: { tabsec = new SgAsmGenericSection(get_file(), this); break; } } tabsec->set_name(new SgAsmBasicString(tabname)); tabsec->set_short_name(tabname_short); tabsec->set_synthesized(true); tabsec->set_purpose(SP_HEADER); tabsec->set_offset(file_offset); tabsec->set_size(pair->get_e_size()); tabsec->set_file_alignment(1); tabsec->set_mapped_alignment(1); tabsec->set_mapped_preferred_rva(pair->get_e_rva().get_rva()); tabsec->set_mapped_actual_va(pair->get_e_rva().get_rva()+get_base_va()); /*FIXME: not sure this is correct. [RPM 2009-09-11]*/ tabsec->set_mapped_size(pair->get_e_size()); tabsec->set_mapped_rperm(true); tabsec->set_mapped_wperm(false); tabsec->set_mapped_xperm(false); pair->set_section(tabsec); pair->set_e_rva(pair->get_e_rva().set_section(tabsec)); } /* Now parse the sections */ for (size_t i=0; i<p_rvasize_pairs->get_pairs().size(); i++) { SgAsmPERVASizePair *pair = p_rvasize_pairs->get_pairs()[i]; SgAsmGenericSection *tabsec = pair->get_section(); if (tabsec) tabsec->parse(); } }
// Read a string from memory std::string StringFinder::decode(const MemoryMap &map, const String &string) const { ASSERT_require(string.isValid()); struct Resources { uint8_t *buffer; Resources(): buffer(NULL) {} ~Resources() { delete buffer; } } r; // Read the data for the string r.buffer = new uint8_t[string.nBytes()]; size_t nRead = map.at(string.address()).limit(string.nBytes()).read(r.buffer).size(); if (nRead < string.nBytes()) { throw MemoryMap::NotMapped("short read for " + StringUtility::numberToString(string.nBytes()) + "-byte string at " + StringUtility::addrToString(string.address()), &map, string.address() + nRead); } // Decode the string length uint8_t *data = r.buffer; size_t dataSize = string.nBytes(); ASSERT_require(string.isValid()); // checks string length for encoding switch (string.lengthEncoding()) { case MAP_TERMINATED: case SEQUENCE_TERMINATED: break; case NUL_TERMINATED: --dataSize; break; case BYTE_LENGTH: { size_t n = *data++; --dataSize; ASSERT_require2(n == dataSize, "mismatched lengths in byte-length encoded string"); break; } case LE16_LENGTH: { size_t n = ByteOrder::le_to_host(*(uint16_t*)data); data += 2; dataSize -= 2; ASSERT_require2(n == dataSize, "mismatched lengths in le16-length encoded string"); break; } case BE16_LENGTH: { size_t n = ByteOrder::be_to_host(*(uint16_t*)data); data += 2; dataSize -= 2; ASSERT_require2(n == dataSize, "mismatched lengths in be16-length encoded string"); break; } case LE32_LENGTH: { size_t n = ByteOrder::le_to_host(*(uint32_t*)data); data += 4; dataSize -= 4; ASSERT_require2(n == dataSize, "mismatched lengths in le32-length encoded string"); break; } case BE32_LENGTH: { size_t n = ByteOrder::be_to_host(*(uint32_t*)data); data += 4; dataSize -= 4; ASSERT_require2(n == dataSize, "mismatched lengths in be32-length encoded string"); break; } } // Decode the string std::string s; switch (string.characterEncoding()) { case ASCII: s = std::string((const char*)data, dataSize); break; } return s; }