Example #1
0
/** Returns a list of parts of a single section that have been referenced.  The offsets are relative to the start of the
 *  section. */
AddressIntervalSet
SgAsmGenericSection::get_referenced_extents() const
{
    if (0==get_size())
        return AddressIntervalSet();

    AddressIntervalSet retval;
    AddressInterval segment = AddressInterval::baseSize(get_offset(), get_size());
    const AddressIntervalSet &fileExtents = get_file()->get_referenced_extents();
    BOOST_FOREACH (const AddressInterval &interval, fileExtents.intervals()) {
        if (segment.isContaining(interval)) {
            retval.insert(AddressInterval::baseSize(interval.least()-get_offset(), interval.size()));
        } else if (interval.isLeftOf(segment) || interval.isRightOf(segment)) {
            // no overlap
        } else if (interval.isContaining(segment)) {
            retval.insert(AddressInterval::baseSize(0, get_size()));
            break;                                      // no point in continuing since we've referenced whole segment now
        } else if (interval.least() < segment.least()) {
            retval.insert(AddressInterval::baseSize(0, interval.least()+interval.size()-get_offset()));
        } else if (interval.greatest() > segment.greatest()) {
            retval.insert(AddressInterval::baseSize(interval.least()-get_offset(), get_offset()+get_size()-interval.least()));
        } else {
            ASSERT_not_reachable("invalid extent overlap category");
        }
    }
    return retval;
}
Example #2
0
Sawyer::Optional<rose_addr_t>
MemoryMap::findAny(const AddressInterval &limits, const std::vector<uint8_t> &bytesToFind,
                   unsigned requiredPerms, unsigned prohibitedPerms) const
{
    if (!limits || bytesToFind.empty())
        return Sawyer::Nothing();

    // Read a bunch of bytes at a time.  If the buffer size is large then we'll have fewer read calls before finding a match,
    // which is good if a match is unlikely.  But if a match is likely, then it's better to use a smaller buffer so we don't
    // ready more than necessary to find a match.  We'll compromise by starting with a small buffer that grows up to some
    // limit.
    size_t nremaining = limits.size();                  // bytes remaining to search (could be zero if limits is universe)
    size_t bufsize = 8;                                 // initial buffer size
    uint8_t buffer[4096];                               // full buffer

    Sawyer::Optional<rose_addr_t> atVa = this->at(limits.least()).require(requiredPerms).prohibit(prohibitedPerms).next();
    while (atVa && *atVa <= limits.greatest()) {
        if (nremaining > 0)                             // zero implies entire address space
            bufsize = std::min(bufsize, nremaining);
        size_t nread = at(*atVa).limit(bufsize).require(requiredPerms).prohibit(prohibitedPerms).read(buffer).size();
        assert(nread > 0);                              // because of the next() calls
        for (size_t offset=0; offset<nread; ++offset) {
            if (std::find(bytesToFind.begin(), bytesToFind.end(), buffer[offset]) != bytesToFind.end())
                return *atVa + offset;                  // found
        }
        atVa = at(*atVa+nread).require(requiredPerms).prohibit(prohibitedPerms).next();
        bufsize = std::min(2*bufsize, sizeof buffer);   // use a larger buffer next time if possible
        nremaining -= nread;                            // ok if nremaining is already zero
    }

    return Sawyer::Nothing();
}
Example #3
0
 bool operator()(const MemoryMap::Super &map, const AddressInterval &interval) {
     rose_addr_t va = interval.least();
     while (va <= interval.greatest()) {
         uint8_t byte;
         map.at(va).limit(1).read(&byte);
         if (!self->isAsciiCharacter(byte))
             return false;
         ++nBytes;
         if (va == interval.greatest())
             return true;                            // prevent overflow
         ++va;
     }
     return true;
 }
Example #4
0
Sawyer::Optional<rose_addr_t>
MemoryMap::findSequence(const AddressInterval &interval, const std::vector<uint8_t> &sequence) const {
    if (interval.isEmpty())
        return Sawyer::Nothing();
    if (sequence.empty())
        return interval.least();
    std::vector<uint8_t> buffer(4096);                  // size is arbitrary
    ASSERT_require2(sequence.size() <= buffer.size(), "long sequences not implemented yet");
    rose_addr_t searchVa = interval.least();
    while (AddressInterval window = atOrAfter(searchVa).read(buffer)) {
        for (size_t offset=0; offset+sequence.size()<=window.size(); ++offset) {
            if (std::equal(sequence.begin(), sequence.end(), &buffer[offset]))
                return window.least() + offset;
        }
        if (window.size()==buffer.size()) {
            searchVa = window.greatest() - buffer.size() + 2; // search for sequence that overlaps window boundary
        } else if (window.greatest() == hull().greatest()) {
            break;                                      // avoid possible overflow
        } else {
            searchVa = window.greatest() + 1;
        }
    }
    return Sawyer::Nothing();
}
Example #5
0
 bool operator()(const MemoryMap::Super &map, const AddressInterval &interval) {
     rose_addr_t va = interval.least();
     if (startVa + nChars != va)
         nChars = 0;
     while (va <= interval.greatest()) {
         uint8_t byte;
         map.at(va).limit(1).read(&byte);
         if (self->isAsciiCharacter(byte)) {
             if (1 == ++nChars)
                 startVa = va;
             if (nChars >= minChars)
                 return false;
         } else {
             nChars = 0;
         }
         if (va == interval.greatest())
             return true;                            // prevent overflow
         ++va;
     }
     return true;
 }
Example #6
0
int
main(int argc, char *argv[]) {
    ROSE_INITIALIZE;

    BinaryAnalysis::Partitioner2::Engine engine;
    Settings settings;
    std::vector<std::string> specimenNames = parseCommandLine(argc, argv, engine, settings /*in,out*/);

    BinaryAnalysis::MagicNumber analyzer;
    analyzer.maxBytesToCheck(settings.maxBytes);

    MemoryMap::Ptr map = engine.loadSpecimens(specimenNames);
    map->dump(mlog[INFO]);

    size_t step = std::max(size_t(1), settings.step);
    AddressInterval limits = settings.limits.isEmpty() ? map->hull() : (settings.limits & map->hull());
    Sawyer::Container::IntervalSet<AddressInterval> addresses(*map);
    addresses.intersect(limits);
    size_t nPositions = addresses.size() / step;
    mlog[INFO] <<"approximately " <<StringUtility::plural(nPositions, "positions") <<" to check\n";

    {
        Sawyer::ProgressBar<size_t> progress(nPositions, mlog[INFO], "positions");
        for (rose_addr_t va=limits.least();
             va<=limits.greatest() && map->atOrAfter(va).next().assignTo(va);
             va+=step, ++progress) {
            std::string magicString = analyzer.identify(map, va);
            if (magicString!="data") {                  // runs home to Momma when it gets confused
                uint8_t buf[8];
                size_t nBytes = map->at(va).limit(sizeof buf).read(buf).size();
                std::cout <<StringUtility::addrToString(va) <<" |" <<leadingBytes(buf, nBytes) <<" | " <<magicString <<"\n";
            }
            if (va==limits.greatest())
                break;                                  // prevent overflow at top of address space
        }
    }
}
Example #7
0
void
MemoryMap::eraseZeros(size_t minsize)
{
    if (isEmpty())
        return;
    unsigned permissions = READABLE | EXECUTABLE;       // access permissions that must be present
    AddressIntervalSet toRemove;                        // to save up intervals until we're done iterating
    AddressInterval zeroInterval;
    uint8_t buf[8192];
    rose_addr_t va = hull().least();
    while (AddressInterval accessed = atOrAfter(va).require(permissions).limit(sizeof buf).read(buf)) {
        for (size_t offset=0; offset<accessed.size(); ++offset) {
            if (0 == buf[offset]) {
                if (zeroInterval.isEmpty()) {
                    zeroInterval = AddressInterval(accessed.least()+offset);
                } else if (zeroInterval.greatest()+1 < offset) {
                    if (zeroInterval.size() >= minsize)
                        toRemove.insert(zeroInterval);
                    zeroInterval = AddressInterval(accessed.least()+offset);
                } else {
                    zeroInterval = AddressInterval::hull(zeroInterval.least(), zeroInterval.greatest()+1);
                }
            } else if (!zeroInterval.isEmpty()) {
                if (zeroInterval.size() >= minsize)
                    toRemove.insert(zeroInterval);
                zeroInterval = AddressInterval();
            }
        }
        if (accessed.greatest() == hull().greatest())
            break;                                      // prevent overflow in next statement
        va += accessed.size();
    }
    if (zeroInterval.size() >= minsize)
        toRemove.insert(zeroInterval);
    BOOST_FOREACH (const AddressInterval &interval, toRemove.intervals())
        erase(interval);
}
Example #8
0
Extent toExtent(const AddressInterval &x) {
    return x.isEmpty() ? Extent() : Extent::inin(x.least(), x.greatest());
}
Example #9
0
// FIXME[Robb P. Matzke 2014-10-09]: No idea how to do this in Microsoft Windows!
void
MemoryMap::insertProcess(const std::string &locatorString) {
#ifdef BOOST_WINDOWS                                    // FIXME[Robb P. Matzke 2014-10-10]
    throw std::runtime_error("MemoryMap::insertProcess is not available on Microsoft Windows");
#else

    // Resources that need to be cleaned up on return or exception
    struct T {
        FILE *mapsFile;                                 // file for /proc/xxx/maps
        char *buf;                                      // line read from /proc/xxx/maps
        size_t bufsz;                                   // bytes allocated for "buf"
        int memFile;                                    // file for /proc/xxx/mem
        pid_t resumeProcess;                            // subordinate process to resume
        T(): mapsFile(NULL), buf(NULL), bufsz(0), memFile(-1), resumeProcess(-1) {}
        ~T() {
            if (mapsFile)
                fclose(mapsFile);
            if (buf)
                free(buf);
            if (memFile>=0)
                close(memFile);
            if (resumeProcess != -1)
                ptrace(PTRACE_DETACH, resumeProcess, 0, 0);
        }
    } local;

    // Parse the locator string.
    bool doAttach = true;
    const char *s = locatorString.c_str();
    if (':'!=*s++)
        throw insertProcessError(locatorString, "initial colon expected");
    while (':'!=*s) {
        if (boost::starts_with(s, "noattach")) {
            doAttach = false;
            s += strlen("noattach");
        } else {
            throw insertProcessError(locatorString, "unknown option beginning at ...\"" + std::string(s) + "\"");
        }
        if (','==*s)
            ++s;
    }
    if (':'!=*s++)
        throw insertProcessError(locatorString, "second colon expected");
    
    int pid = parseInteger(locatorString, s /*in,out*/, "process ID expected");

    // We need to attach to the process with ptrace before we can read from its /proc/xxx/mem file.  We'll have
    // to detach if anything goes wrong or when we finish.
    if (doAttach) {
        if (-1 == ptrace(PTRACE_ATTACH, pid, 0, 0))
            throw insertProcessError(locatorString, "cannot attach: " + std::string(strerror(errno)));
        int wstat = 0;
        if (-1 == waitpid(pid, &wstat, 0))
            throw insertProcessError(locatorString, "cannot wait: " + std::string(strerror(errno)));
        if (WIFEXITED(wstat))
            throw insertProcessError(locatorString, "process exited before it could be read");
        if (WIFSIGNALED(wstat))
            throw insertProcessError(locatorString, "process died with " +
                                     boost::to_lower_copy(std::string(strsignal(WTERMSIG(wstat)))) +
                                     " before it could be read");
        local.resumeProcess = pid;
        ASSERT_require2(WIFSTOPPED(wstat) && WSTOPSIG(wstat)==SIGSTOP, "subordinate process did not stop");
    }

    // Prepare to read subordinate's memory
    std::string mapsName = "/proc/" + StringUtility::numberToString(pid) + "/maps";
    if (NULL==(local.mapsFile = fopen(mapsName.c_str(), "r")))
        throw insertProcessError(locatorString, "cannot open " + mapsName + ": " + strerror(errno));
    std::string memName = "/proc/" + StringUtility::numberToString(pid) + "/mem";
    if (-1 == (local.memFile = open(memName.c_str(), O_RDONLY)))
        throw insertProcessError(locatorString, "cannot open " + memName + ": " + strerror(errno));

    // Read each line from the /proc/xxx/maps to figure out what memory is mapped in the subordinate process. The format for
    // the part we're interested in is /^([0-9a-f]+)-([0-9a-f]+) ([-r][-w][-x])/ where $1 is the inclusive starting address, $2
    // is the exclusive ending address, and $3 are the permissions.
    int mapsFileLineNumber = 0;
    while (rose_getline(&local.buf, &local.bufsz, local.mapsFile)>0) {
        ++mapsFileLineNumber;

        // Begin address
        char *s=local.buf, *rest=s;
        errno = 0;
        rose_addr_t begin = rose_strtoull(s, &rest, 16);
        if (errno!=0 || rest==s || '-'!=*rest) {
            throw insertProcessError(locatorString, mapsName + " syntax error for beginning address at line " +
                                     StringUtility::numberToString(mapsFileLineNumber) + ": " + local.buf);
        }

        // End address
        s = rest+1;
        rose_addr_t end = rose_strtoull(s, &rest, 16);
        if (errno!=0 || rest==s || ' '!=*rest) {
            throw insertProcessError(locatorString, mapsName + " syntax error for ending address at line " +
                                     StringUtility::numberToString(mapsFileLineNumber) + ": " + local.buf);
        }
        if (begin >= end) {
            throw insertProcessError(locatorString, mapsName + " invalid address range at line " +
                                     StringUtility::numberToString(mapsFileLineNumber) + ": " + local.buf);
        }

        // Access permissions
        s = ++rest;
        if ((s[0]!='r' && s[0]!='-') || (s[1]!='w' && s[1]!='-') || (s[2]!='x' && s[2]!='-')) {
            throw insertProcessError(locatorString, mapsName + " invalid access permissions at line " +
                                     StringUtility::numberToString(mapsFileLineNumber) + ": " + local.buf);
        }
        unsigned accessibility = ('r'==s[0] ? READABLE : 0) | ('w'==s[1] ? WRITABLE : 0) | ('x'==s[2] ? EXECUTABLE : 0);

        // Skip over unused fields
        for (size_t nSpaces=0; nSpaces<4 && *s; ++s) {
            if (isspace(*s))
                ++nSpaces;
        }
        while (isspace(*s)) ++s;

        // Segment name according to the kernel
        std::string kernelSegmentName;
        while (*s && !isspace(*s))
            kernelSegmentName += *s++;

        // Create memory segment, but don't insert it until after we read all the data
        std::string segmentName = "proc:" + StringUtility::numberToString(pid);
        AddressInterval segmentInterval = AddressInterval::baseSize(begin, end-begin);
        Segment segment = Segment::anonymousInstance(segmentInterval.size(), accessibility,
                                                     segmentName + "(" + kernelSegmentName + ")");

        // Copy data from the subordinate process into our memory segment
        if (-1 == lseek(local.memFile, begin, SEEK_SET))
            throw insertProcessError(locatorString, memName + " seek failed: " + strerror(errno));
        size_t nRemain = segmentInterval.size();
        rose_addr_t segmentBufferOffset = 0;
        while (nRemain > 0) {
            uint8_t chunkBuf[8192];
            size_t chunkSize = std::min(nRemain, sizeof chunkBuf);
            ssize_t nRead = ::read(local.memFile, chunkBuf, chunkSize);
            if (-1==nRead) {
                if (EINTR==errno)
                    continue;
                //mlog[WARN] <<strerror(errno) <<" during read from " <<memName <<" for segment " <<kernelSegmentName
                //           <<" at " <<segmentInterval <<"\n";
                segmentName += "[" + boost::to_lower_copy(std::string(strerror(errno))) + "]";
                break;
            } else if (0==nRead) {
                //mlog[WARN] <<"short read from " <<memName <<" for segment " <<kernelSegmentName <<" at " <<segmentInterval <<"\n";
                segmentName += "[short read]";
                break;
            }
            rose_addr_t nWrite = segment.buffer()->write(chunkBuf, segmentBufferOffset, nRead);
            ASSERT_always_require(nWrite == (rose_addr_t)nRead);
            nRemain -= chunkSize;
            segmentBufferOffset += chunkSize;
        }
        if (nRemain > 0) {
            // If a read failed, map only what we could read
            segmentInterval = AddressInterval::baseSize(segmentInterval.least(), segmentInterval.size()-nRemain);
        }

        // Insert segment into memory map
        if (!segmentInterval.isEmpty())
            insert(segmentInterval, segment);
    }
#endif
}
Example #10
0
// Insert file from a locator string of the form:
//   :[VA][+VSIZE][=PERMS]:[OFFSET][+FSIZE]:FILENAME
AddressInterval
MemoryMap::insertFile(const std::string &locatorString) {

    //--------------------------------------
    // Parse the parts of the locator string
    //--------------------------------------

    // Leading colon
    const char *s = locatorString.c_str();
    if (':'!=*s++)
        throw insertFileError(locatorString, "not a locator string");

    // Virtual address
    Sawyer::Optional<rose_addr_t> optionalVa;
    if (isdigit(*s))
        optionalVa = parseInteger(locatorString, s /*in,out*/, "virtual address expected");

    // Virtual size
    Sawyer::Optional<size_t> optionalVSize;
    if ('+'==*s) {
        ++s;
        optionalVSize = parseInteger(locatorString, s /*in,out*/, "virtual size expected");
    }

    // Virtual accessibility
    Sawyer::Optional<unsigned> optionalAccess;
    if ('='==*s) {
        ++s;
        unsigned a = 0;
        if ('r'==*s) {
            ++s;
            a |= READABLE;
        }
        if ('w'==*s) {
            ++s;
            a |= WRITABLE;
        }
        if ('x'==*s) {
            ++s;
            a |= EXECUTABLE;
        }
        optionalAccess = a;
    }

    // Second colon
    if (':'!=*s) {
        if (*s && optionalAccess)
            throw insertFileError(locatorString, "invalid access spec");
        throw insertFileError(locatorString, "syntax error before second colon");
    }
    ++s;

    // File offset
    Sawyer::Optional<size_t> optionalOffset;
    if (isdigit(*s))
        optionalOffset = parseInteger(locatorString, s /*in,out*/, "file offset expected");
    
    // File size
    Sawyer::Optional<size_t> optionalFSize;
    if ('+'==*s) {
        ++s;
        optionalFSize = parseInteger(locatorString, s /*in,out*/, "file size expected");
    }

    // Third colon
    if (':'!=*s)
        throw insertFileError(locatorString, "syntax error before third colon");
    ++s;

    // File name
    if (!*s)
        throw insertFileError(locatorString, "file name expected after third colon");
    std::string fileName = s;
    if (fileName.size()!=strlen(fileName.c_str()))
        throw insertFileError(locatorString, "invalid file name");
    std::string segmentName = FileSystem::toString(boost::filesystem::path(fileName).filename());

    //-------------------------------- 
    // Open the file and read the data
    //-------------------------------- 

    // Open the file and seek to the start of data
    std::ifstream file(fileName.c_str());
    if (!file.good())
        throw std::runtime_error("MemoryMap::insertFile: cannot open file \""+StringUtility::cEscape(fileName)+"\"");
    if (optionalOffset)
        file.seekg(*optionalOffset);
    if (!file.good())
        throw std::runtime_error("MemoryMap::insertFile: cannot seek in file \""+StringUtility::cEscape(fileName)+"\"");

    // If no file size was specified then try to get one, or delay getting one until later.  On POSIX systems we can use stat
    // to get the file size, which is useful because infinite devices (like /dev/zero) will return zero.  Otherwise we'll get
    // the file size by trying to read from the file.
#if !defined(BOOST_WINDOWS)                             // not targeting Windows; i.e., not Microsoft C++ and not MinGW
    if (!optionalFSize) {
        struct stat sb;
        if (0==stat(fileName.c_str(), &sb))
            optionalFSize = sb.st_size;
    }
#endif

    // Limit the file size according to the virtual size.  We never need to read more than what would be mapped.
    if (optionalVSize) {
        if (optionalFSize) {
            optionalFSize = std::min(*optionalFSize, *optionalVSize);
        } else {
            optionalFSize = optionalVSize;
        }
    }

    // Read the file data.  If we know the file size then we can allocate a buffer and read it all in one shot, otherwise we'll
    // have to read a little at a time (only happens on Windows due to stat call above).
    uint8_t *data = NULL;                               // data read from the file
    size_t nRead = 0;                                   // bytes of data actually allocated, read, and initialized in "data"
    if (optionalFSize) {
        // This is reasonably fast and not too bad on memory
        if (0 != *optionalFSize) {
            data = new uint8_t[*optionalFSize];
            file.read((char*)data, *optionalFSize);
            nRead = file.gcount();
            if (nRead != *optionalFSize)
                throw std::runtime_error("MemoryMap::insertFile: short read from \""+StringUtility::cEscape(fileName)+"\"");
        }
    } else {
        while (file.good()) {
            uint8_t page[4096];
            file.read((char*)page, sizeof page);
            size_t n = file.gcount();
            uint8_t *tmp = new uint8_t[nRead + n];
            memcpy(tmp, data, nRead);
            memcpy(tmp+nRead, page, n);
            delete[] data;
            data = tmp;
            nRead += n;
        }
        optionalFSize = nRead;
    }

    // Choose virtual size
    if (!optionalVSize) {
        ASSERT_require(optionalFSize);
        optionalVSize = optionalFSize;
    }

    // Choose accessibility
    if (!optionalAccess) {
#ifdef BOOST_WINDOWS
        optionalAccess = READABLE | WRITABLE;
#else
        unsigned a = 0;
        if (0==::access(fileName.c_str(), R_OK))
            a |= READABLE;
        if (0==::access(fileName.c_str(), W_OK))
            a |= WRITABLE;
        if (0==::access(fileName.c_str(), X_OK))
            a |= EXECUTABLE;
        optionalAccess = a;
#endif
    }

    // Find a place to map the file.
    if (!optionalVa) {
        ASSERT_require(optionalVSize);
        optionalVa = findFreeSpace(*optionalVSize);
    }

    // Adjust the memory map
    ASSERT_require(optionalVa);
    ASSERT_require(optionalVSize);
    ASSERT_require(optionalAccess);
    ASSERT_require(nRead <= *optionalVSize);
    if (0 == *optionalVSize)
        return AddressInterval();                       // empty
    AddressInterval interval = AddressInterval::baseSize(*optionalVa, *optionalVSize);
    insert(interval, Segment::anonymousInstance(interval.size(), *optionalAccess, segmentName));
    size_t nCopied = at(interval.least()).limit(nRead).write(data).size();
    ASSERT_always_require(nRead==nCopied);              // better work since we just created the segment!
    return interval;
}