/** Returns a list of parts of a single section that have been referenced. The offsets are relative to the start of the * section. */ AddressIntervalSet SgAsmGenericSection::get_referenced_extents() const { if (0==get_size()) return AddressIntervalSet(); AddressIntervalSet retval; AddressInterval segment = AddressInterval::baseSize(get_offset(), get_size()); const AddressIntervalSet &fileExtents = get_file()->get_referenced_extents(); BOOST_FOREACH (const AddressInterval &interval, fileExtents.intervals()) { if (segment.isContaining(interval)) { retval.insert(AddressInterval::baseSize(interval.least()-get_offset(), interval.size())); } else if (interval.isLeftOf(segment) || interval.isRightOf(segment)) { // no overlap } else if (interval.isContaining(segment)) { retval.insert(AddressInterval::baseSize(0, get_size())); break; // no point in continuing since we've referenced whole segment now } else if (interval.least() < segment.least()) { retval.insert(AddressInterval::baseSize(0, interval.least()+interval.size()-get_offset())); } else if (interval.greatest() > segment.greatest()) { retval.insert(AddressInterval::baseSize(interval.least()-get_offset(), get_offset()+get_size()-interval.least())); } else { ASSERT_not_reachable("invalid extent overlap category"); } } return retval; }
Sawyer::Optional<rose_addr_t> MemoryMap::findAny(const AddressInterval &limits, const std::vector<uint8_t> &bytesToFind, unsigned requiredPerms, unsigned prohibitedPerms) const { if (!limits || bytesToFind.empty()) return Sawyer::Nothing(); // Read a bunch of bytes at a time. If the buffer size is large then we'll have fewer read calls before finding a match, // which is good if a match is unlikely. But if a match is likely, then it's better to use a smaller buffer so we don't // ready more than necessary to find a match. We'll compromise by starting with a small buffer that grows up to some // limit. size_t nremaining = limits.size(); // bytes remaining to search (could be zero if limits is universe) size_t bufsize = 8; // initial buffer size uint8_t buffer[4096]; // full buffer Sawyer::Optional<rose_addr_t> atVa = this->at(limits.least()).require(requiredPerms).prohibit(prohibitedPerms).next(); while (atVa && *atVa <= limits.greatest()) { if (nremaining > 0) // zero implies entire address space bufsize = std::min(bufsize, nremaining); size_t nread = at(*atVa).limit(bufsize).require(requiredPerms).prohibit(prohibitedPerms).read(buffer).size(); assert(nread > 0); // because of the next() calls for (size_t offset=0; offset<nread; ++offset) { if (std::find(bytesToFind.begin(), bytesToFind.end(), buffer[offset]) != bytesToFind.end()) return *atVa + offset; // found } atVa = at(*atVa+nread).require(requiredPerms).prohibit(prohibitedPerms).next(); bufsize = std::min(2*bufsize, sizeof buffer); // use a larger buffer next time if possible nremaining -= nread; // ok if nremaining is already zero } return Sawyer::Nothing(); }
bool operator()(const MemoryMap::Super &map, const AddressInterval &interval) { rose_addr_t va = interval.least(); while (va <= interval.greatest()) { uint8_t byte; map.at(va).limit(1).read(&byte); if (!self->isAsciiCharacter(byte)) return false; ++nBytes; if (va == interval.greatest()) return true; // prevent overflow ++va; } return true; }
Sawyer::Optional<rose_addr_t> MemoryMap::findSequence(const AddressInterval &interval, const std::vector<uint8_t> &sequence) const { if (interval.isEmpty()) return Sawyer::Nothing(); if (sequence.empty()) return interval.least(); std::vector<uint8_t> buffer(4096); // size is arbitrary ASSERT_require2(sequence.size() <= buffer.size(), "long sequences not implemented yet"); rose_addr_t searchVa = interval.least(); while (AddressInterval window = atOrAfter(searchVa).read(buffer)) { for (size_t offset=0; offset+sequence.size()<=window.size(); ++offset) { if (std::equal(sequence.begin(), sequence.end(), &buffer[offset])) return window.least() + offset; } if (window.size()==buffer.size()) { searchVa = window.greatest() - buffer.size() + 2; // search for sequence that overlaps window boundary } else if (window.greatest() == hull().greatest()) { break; // avoid possible overflow } else { searchVa = window.greatest() + 1; } } return Sawyer::Nothing(); }
bool operator()(const MemoryMap::Super &map, const AddressInterval &interval) { rose_addr_t va = interval.least(); if (startVa + nChars != va) nChars = 0; while (va <= interval.greatest()) { uint8_t byte; map.at(va).limit(1).read(&byte); if (self->isAsciiCharacter(byte)) { if (1 == ++nChars) startVa = va; if (nChars >= minChars) return false; } else { nChars = 0; } if (va == interval.greatest()) return true; // prevent overflow ++va; } return true; }
int main(int argc, char *argv[]) { ROSE_INITIALIZE; BinaryAnalysis::Partitioner2::Engine engine; Settings settings; std::vector<std::string> specimenNames = parseCommandLine(argc, argv, engine, settings /*in,out*/); BinaryAnalysis::MagicNumber analyzer; analyzer.maxBytesToCheck(settings.maxBytes); MemoryMap::Ptr map = engine.loadSpecimens(specimenNames); map->dump(mlog[INFO]); size_t step = std::max(size_t(1), settings.step); AddressInterval limits = settings.limits.isEmpty() ? map->hull() : (settings.limits & map->hull()); Sawyer::Container::IntervalSet<AddressInterval> addresses(*map); addresses.intersect(limits); size_t nPositions = addresses.size() / step; mlog[INFO] <<"approximately " <<StringUtility::plural(nPositions, "positions") <<" to check\n"; { Sawyer::ProgressBar<size_t> progress(nPositions, mlog[INFO], "positions"); for (rose_addr_t va=limits.least(); va<=limits.greatest() && map->atOrAfter(va).next().assignTo(va); va+=step, ++progress) { std::string magicString = analyzer.identify(map, va); if (magicString!="data") { // runs home to Momma when it gets confused uint8_t buf[8]; size_t nBytes = map->at(va).limit(sizeof buf).read(buf).size(); std::cout <<StringUtility::addrToString(va) <<" |" <<leadingBytes(buf, nBytes) <<" | " <<magicString <<"\n"; } if (va==limits.greatest()) break; // prevent overflow at top of address space } } }
void MemoryMap::eraseZeros(size_t minsize) { if (isEmpty()) return; unsigned permissions = READABLE | EXECUTABLE; // access permissions that must be present AddressIntervalSet toRemove; // to save up intervals until we're done iterating AddressInterval zeroInterval; uint8_t buf[8192]; rose_addr_t va = hull().least(); while (AddressInterval accessed = atOrAfter(va).require(permissions).limit(sizeof buf).read(buf)) { for (size_t offset=0; offset<accessed.size(); ++offset) { if (0 == buf[offset]) { if (zeroInterval.isEmpty()) { zeroInterval = AddressInterval(accessed.least()+offset); } else if (zeroInterval.greatest()+1 < offset) { if (zeroInterval.size() >= minsize) toRemove.insert(zeroInterval); zeroInterval = AddressInterval(accessed.least()+offset); } else { zeroInterval = AddressInterval::hull(zeroInterval.least(), zeroInterval.greatest()+1); } } else if (!zeroInterval.isEmpty()) { if (zeroInterval.size() >= minsize) toRemove.insert(zeroInterval); zeroInterval = AddressInterval(); } } if (accessed.greatest() == hull().greatest()) break; // prevent overflow in next statement va += accessed.size(); } if (zeroInterval.size() >= minsize) toRemove.insert(zeroInterval); BOOST_FOREACH (const AddressInterval &interval, toRemove.intervals()) erase(interval); }
Extent toExtent(const AddressInterval &x) { return x.isEmpty() ? Extent() : Extent::inin(x.least(), x.greatest()); }
// FIXME[Robb P. Matzke 2014-10-09]: No idea how to do this in Microsoft Windows! void MemoryMap::insertProcess(const std::string &locatorString) { #ifdef BOOST_WINDOWS // FIXME[Robb P. Matzke 2014-10-10] throw std::runtime_error("MemoryMap::insertProcess is not available on Microsoft Windows"); #else // Resources that need to be cleaned up on return or exception struct T { FILE *mapsFile; // file for /proc/xxx/maps char *buf; // line read from /proc/xxx/maps size_t bufsz; // bytes allocated for "buf" int memFile; // file for /proc/xxx/mem pid_t resumeProcess; // subordinate process to resume T(): mapsFile(NULL), buf(NULL), bufsz(0), memFile(-1), resumeProcess(-1) {} ~T() { if (mapsFile) fclose(mapsFile); if (buf) free(buf); if (memFile>=0) close(memFile); if (resumeProcess != -1) ptrace(PTRACE_DETACH, resumeProcess, 0, 0); } } local; // Parse the locator string. bool doAttach = true; const char *s = locatorString.c_str(); if (':'!=*s++) throw insertProcessError(locatorString, "initial colon expected"); while (':'!=*s) { if (boost::starts_with(s, "noattach")) { doAttach = false; s += strlen("noattach"); } else { throw insertProcessError(locatorString, "unknown option beginning at ...\"" + std::string(s) + "\""); } if (','==*s) ++s; } if (':'!=*s++) throw insertProcessError(locatorString, "second colon expected"); int pid = parseInteger(locatorString, s /*in,out*/, "process ID expected"); // We need to attach to the process with ptrace before we can read from its /proc/xxx/mem file. We'll have // to detach if anything goes wrong or when we finish. if (doAttach) { if (-1 == ptrace(PTRACE_ATTACH, pid, 0, 0)) throw insertProcessError(locatorString, "cannot attach: " + std::string(strerror(errno))); int wstat = 0; if (-1 == waitpid(pid, &wstat, 0)) throw insertProcessError(locatorString, "cannot wait: " + std::string(strerror(errno))); if (WIFEXITED(wstat)) throw insertProcessError(locatorString, "process exited before it could be read"); if (WIFSIGNALED(wstat)) throw insertProcessError(locatorString, "process died with " + boost::to_lower_copy(std::string(strsignal(WTERMSIG(wstat)))) + " before it could be read"); local.resumeProcess = pid; ASSERT_require2(WIFSTOPPED(wstat) && WSTOPSIG(wstat)==SIGSTOP, "subordinate process did not stop"); } // Prepare to read subordinate's memory std::string mapsName = "/proc/" + StringUtility::numberToString(pid) + "/maps"; if (NULL==(local.mapsFile = fopen(mapsName.c_str(), "r"))) throw insertProcessError(locatorString, "cannot open " + mapsName + ": " + strerror(errno)); std::string memName = "/proc/" + StringUtility::numberToString(pid) + "/mem"; if (-1 == (local.memFile = open(memName.c_str(), O_RDONLY))) throw insertProcessError(locatorString, "cannot open " + memName + ": " + strerror(errno)); // Read each line from the /proc/xxx/maps to figure out what memory is mapped in the subordinate process. The format for // the part we're interested in is /^([0-9a-f]+)-([0-9a-f]+) ([-r][-w][-x])/ where $1 is the inclusive starting address, $2 // is the exclusive ending address, and $3 are the permissions. int mapsFileLineNumber = 0; while (rose_getline(&local.buf, &local.bufsz, local.mapsFile)>0) { ++mapsFileLineNumber; // Begin address char *s=local.buf, *rest=s; errno = 0; rose_addr_t begin = rose_strtoull(s, &rest, 16); if (errno!=0 || rest==s || '-'!=*rest) { throw insertProcessError(locatorString, mapsName + " syntax error for beginning address at line " + StringUtility::numberToString(mapsFileLineNumber) + ": " + local.buf); } // End address s = rest+1; rose_addr_t end = rose_strtoull(s, &rest, 16); if (errno!=0 || rest==s || ' '!=*rest) { throw insertProcessError(locatorString, mapsName + " syntax error for ending address at line " + StringUtility::numberToString(mapsFileLineNumber) + ": " + local.buf); } if (begin >= end) { throw insertProcessError(locatorString, mapsName + " invalid address range at line " + StringUtility::numberToString(mapsFileLineNumber) + ": " + local.buf); } // Access permissions s = ++rest; if ((s[0]!='r' && s[0]!='-') || (s[1]!='w' && s[1]!='-') || (s[2]!='x' && s[2]!='-')) { throw insertProcessError(locatorString, mapsName + " invalid access permissions at line " + StringUtility::numberToString(mapsFileLineNumber) + ": " + local.buf); } unsigned accessibility = ('r'==s[0] ? READABLE : 0) | ('w'==s[1] ? WRITABLE : 0) | ('x'==s[2] ? EXECUTABLE : 0); // Skip over unused fields for (size_t nSpaces=0; nSpaces<4 && *s; ++s) { if (isspace(*s)) ++nSpaces; } while (isspace(*s)) ++s; // Segment name according to the kernel std::string kernelSegmentName; while (*s && !isspace(*s)) kernelSegmentName += *s++; // Create memory segment, but don't insert it until after we read all the data std::string segmentName = "proc:" + StringUtility::numberToString(pid); AddressInterval segmentInterval = AddressInterval::baseSize(begin, end-begin); Segment segment = Segment::anonymousInstance(segmentInterval.size(), accessibility, segmentName + "(" + kernelSegmentName + ")"); // Copy data from the subordinate process into our memory segment if (-1 == lseek(local.memFile, begin, SEEK_SET)) throw insertProcessError(locatorString, memName + " seek failed: " + strerror(errno)); size_t nRemain = segmentInterval.size(); rose_addr_t segmentBufferOffset = 0; while (nRemain > 0) { uint8_t chunkBuf[8192]; size_t chunkSize = std::min(nRemain, sizeof chunkBuf); ssize_t nRead = ::read(local.memFile, chunkBuf, chunkSize); if (-1==nRead) { if (EINTR==errno) continue; //mlog[WARN] <<strerror(errno) <<" during read from " <<memName <<" for segment " <<kernelSegmentName // <<" at " <<segmentInterval <<"\n"; segmentName += "[" + boost::to_lower_copy(std::string(strerror(errno))) + "]"; break; } else if (0==nRead) { //mlog[WARN] <<"short read from " <<memName <<" for segment " <<kernelSegmentName <<" at " <<segmentInterval <<"\n"; segmentName += "[short read]"; break; } rose_addr_t nWrite = segment.buffer()->write(chunkBuf, segmentBufferOffset, nRead); ASSERT_always_require(nWrite == (rose_addr_t)nRead); nRemain -= chunkSize; segmentBufferOffset += chunkSize; } if (nRemain > 0) { // If a read failed, map only what we could read segmentInterval = AddressInterval::baseSize(segmentInterval.least(), segmentInterval.size()-nRemain); } // Insert segment into memory map if (!segmentInterval.isEmpty()) insert(segmentInterval, segment); } #endif }
// Insert file from a locator string of the form: // :[VA][+VSIZE][=PERMS]:[OFFSET][+FSIZE]:FILENAME AddressInterval MemoryMap::insertFile(const std::string &locatorString) { //-------------------------------------- // Parse the parts of the locator string //-------------------------------------- // Leading colon const char *s = locatorString.c_str(); if (':'!=*s++) throw insertFileError(locatorString, "not a locator string"); // Virtual address Sawyer::Optional<rose_addr_t> optionalVa; if (isdigit(*s)) optionalVa = parseInteger(locatorString, s /*in,out*/, "virtual address expected"); // Virtual size Sawyer::Optional<size_t> optionalVSize; if ('+'==*s) { ++s; optionalVSize = parseInteger(locatorString, s /*in,out*/, "virtual size expected"); } // Virtual accessibility Sawyer::Optional<unsigned> optionalAccess; if ('='==*s) { ++s; unsigned a = 0; if ('r'==*s) { ++s; a |= READABLE; } if ('w'==*s) { ++s; a |= WRITABLE; } if ('x'==*s) { ++s; a |= EXECUTABLE; } optionalAccess = a; } // Second colon if (':'!=*s) { if (*s && optionalAccess) throw insertFileError(locatorString, "invalid access spec"); throw insertFileError(locatorString, "syntax error before second colon"); } ++s; // File offset Sawyer::Optional<size_t> optionalOffset; if (isdigit(*s)) optionalOffset = parseInteger(locatorString, s /*in,out*/, "file offset expected"); // File size Sawyer::Optional<size_t> optionalFSize; if ('+'==*s) { ++s; optionalFSize = parseInteger(locatorString, s /*in,out*/, "file size expected"); } // Third colon if (':'!=*s) throw insertFileError(locatorString, "syntax error before third colon"); ++s; // File name if (!*s) throw insertFileError(locatorString, "file name expected after third colon"); std::string fileName = s; if (fileName.size()!=strlen(fileName.c_str())) throw insertFileError(locatorString, "invalid file name"); std::string segmentName = FileSystem::toString(boost::filesystem::path(fileName).filename()); //-------------------------------- // Open the file and read the data //-------------------------------- // Open the file and seek to the start of data std::ifstream file(fileName.c_str()); if (!file.good()) throw std::runtime_error("MemoryMap::insertFile: cannot open file \""+StringUtility::cEscape(fileName)+"\""); if (optionalOffset) file.seekg(*optionalOffset); if (!file.good()) throw std::runtime_error("MemoryMap::insertFile: cannot seek in file \""+StringUtility::cEscape(fileName)+"\""); // If no file size was specified then try to get one, or delay getting one until later. On POSIX systems we can use stat // to get the file size, which is useful because infinite devices (like /dev/zero) will return zero. Otherwise we'll get // the file size by trying to read from the file. #if !defined(BOOST_WINDOWS) // not targeting Windows; i.e., not Microsoft C++ and not MinGW if (!optionalFSize) { struct stat sb; if (0==stat(fileName.c_str(), &sb)) optionalFSize = sb.st_size; } #endif // Limit the file size according to the virtual size. We never need to read more than what would be mapped. if (optionalVSize) { if (optionalFSize) { optionalFSize = std::min(*optionalFSize, *optionalVSize); } else { optionalFSize = optionalVSize; } } // Read the file data. If we know the file size then we can allocate a buffer and read it all in one shot, otherwise we'll // have to read a little at a time (only happens on Windows due to stat call above). uint8_t *data = NULL; // data read from the file size_t nRead = 0; // bytes of data actually allocated, read, and initialized in "data" if (optionalFSize) { // This is reasonably fast and not too bad on memory if (0 != *optionalFSize) { data = new uint8_t[*optionalFSize]; file.read((char*)data, *optionalFSize); nRead = file.gcount(); if (nRead != *optionalFSize) throw std::runtime_error("MemoryMap::insertFile: short read from \""+StringUtility::cEscape(fileName)+"\""); } } else { while (file.good()) { uint8_t page[4096]; file.read((char*)page, sizeof page); size_t n = file.gcount(); uint8_t *tmp = new uint8_t[nRead + n]; memcpy(tmp, data, nRead); memcpy(tmp+nRead, page, n); delete[] data; data = tmp; nRead += n; } optionalFSize = nRead; } // Choose virtual size if (!optionalVSize) { ASSERT_require(optionalFSize); optionalVSize = optionalFSize; } // Choose accessibility if (!optionalAccess) { #ifdef BOOST_WINDOWS optionalAccess = READABLE | WRITABLE; #else unsigned a = 0; if (0==::access(fileName.c_str(), R_OK)) a |= READABLE; if (0==::access(fileName.c_str(), W_OK)) a |= WRITABLE; if (0==::access(fileName.c_str(), X_OK)) a |= EXECUTABLE; optionalAccess = a; #endif } // Find a place to map the file. if (!optionalVa) { ASSERT_require(optionalVSize); optionalVa = findFreeSpace(*optionalVSize); } // Adjust the memory map ASSERT_require(optionalVa); ASSERT_require(optionalVSize); ASSERT_require(optionalAccess); ASSERT_require(nRead <= *optionalVSize); if (0 == *optionalVSize) return AddressInterval(); // empty AddressInterval interval = AddressInterval::baseSize(*optionalVa, *optionalVSize); insert(interval, Segment::anonymousInstance(interval.size(), *optionalAccess, segmentName)); size_t nCopied = at(interval.least()).limit(nRead).write(data).size(); ASSERT_always_require(nRead==nCopied); // better work since we just created the segment! return interval; }