예제 #1
0
void feature_recorder::write_buf(const sbuf_t &sbuf,size_t pos,size_t len)
{
#ifdef DEBUG_SCANNER
    if(debug & DEBUG_SCANNER){
        std::cerr << "*** write_buf " << name << " sbuf=" << sbuf << " pos=" << pos << " len=" << len << "\n";
        // for debugging, print Imagine that when pos= the location where the crash is happening.
        // then set a breakpoint at std::cerr.
        if(pos==9999999){
            std::cerr << "Imagine that\n";
        }
    }
#endif

    /* If we are in the margin, ignore; it will be processed again */
    if(pos >= sbuf.pagesize && pos < sbuf.bufsize){
        return;
    }

    if(pos >= sbuf.bufsize){    /* Sanity checks */
        std::cerr << "*** write_buf: WRITE OUTSIDE BUFFER. "
                  << " pos="  << pos
                  << " sbuf=" << sbuf << "\n";
        return;
    }

    /* Asked to write beyond bufsize; bring it in */
    if(pos+len > sbuf.bufsize){
        len = sbuf.bufsize - pos;
    }

    string feature = sbuf.substr(pos,len);
    string context;

    if((flags & FLAG_NO_CONTEXT)==0){
        /* Context write; create a clean context */
        size_t p0 = context_window_before < pos ? pos-context_window_before : 0;
        size_t p1 = pos+len+context_window_after;
        
        if(p1>sbuf.bufsize) p1 = sbuf.bufsize;
        assert(p0<=p1);
        context = sbuf.substr(p0,p1-p0);
    }
    this->write(sbuf.pos0+pos,feature,context);
#ifdef DEBUG_SCANNER
    if(debug & DEBUG_SCANNER){
        std::cerr << ".\n";
    }
#endif
}
예제 #2
0
// read utf32 and return unvalidated utf8
string get_possible_utf32(const sbuf_t &sbuf, size_t count, sbuf_t::byte_order_t byte_order) {

    // check for sequence
    if (chars_match(sbuf, count) || char_pairs_match(sbuf, count)) {
        // this is an uninteresting sequence, so return ""
        return "";
    }
    std::string utf8_string;
    std::back_insert_iterator<std::basic_string<char> > result = back_inserter(utf8_string);

    for (uint32_t i=0; i<count; i++) {
        try {
            uint32_t code_point;
            code_point = sbuf.get32u(i * 4, byte_order);
            try {
                result = utf8::append(code_point, result);
            } catch (utf8::invalid_code_point) {

                // invalid code point so put in the byte values directly,
                // disregarding endian convention
                utf8_string += (uint8_t)code_point;
                utf8_string += (uint8_t)code_point/0x100;
                utf8_string += (uint8_t)code_point/0x10000;
                utf8_string += (uint8_t)code_point/0x1000000;
            }
        } catch (sbuf_t::range_exception_t &e) {
            // at end of buffer
            break;
        }
    }
    return utf8_string;
}
예제 #3
0
static string get_possible_utf8(const sbuf_t &sbuf, size_t count) {
    if (chars_match(sbuf, count) || char_pairs_match(sbuf, count)) {
        // this is an uninteresting sequence, so return ""
        return "";
    } else {
        std::string s;
        sbuf.getUTF8WithQuoting(0, count, s);
        return s;
    }
}
예제 #4
0
/**
 * Return 0 if the directory is invalid
 * 1 if the directory is valid dentry
 * 2 if the directory is valid LFN
 * 10 if the directory is valid and it's the last in the sector.
 * 20 if all null, so there are no more valid
 *
 * http://en.wikipedia.org/wiki/File_Allocation_Table
 */
int valid_fat_directory_entry(const sbuf_t &sbuf)
{
    if(sbuf.bufsize != sizeof(fatfs_dentry)) return 0; // not big enough
    /* If the entire directory entry is the same character, it's not valid */
    if(sbuf.is_constant(sbuf[0])) return 20; // clearly not valid

    const fatfs_dentry &dentry = *(sbuf.get_struct_ptr<fatfs_dentry>(0));
    if((dentry.attrib & ~FATFS_ATTR_ALL) != 0) return 0; // invalid attribute bit set
    if(dentry.attrib == FATFS_ATTR_LFN) {
        /* This may be a VFAT long file name */
        const fatfs_dentry_lfn &lfn = *(const fatfs_dentry_lfn *)sbuf.buf;
        if((lfn.seq & ~0x40) > 10) return 0;	// invalid sequence number
        if(lfn.reserved1 != 0) return 0; // invalid reserved1 (LDIR_Type)
        if(fat16int(lfn.reserved2)!=0) return 0; // LDIR_FstClusLO "Must be ZERO"
        return 2;				 // looks okay
    } else {
        if(dentry.name[0]==0) return 10; // "Entry is available and no subsequent entry is in use. "

        /* Look for combinations of times, dates and attributes that have been invalid */
        if((dentry.attrib & FATFS_ATTR_LFN)==FATFS_ATTR_LFN &&
                (dentry.attrib != FATFS_ATTR_LFN)) {
            return 0;			// LFN set but DIR or ARCHIVE is also set
        }
        if((dentry.attrib & FATFS_ATTR_DIRECTORY) && (dentry.attrib & FATFS_ATTR_ARCHIVE)) {
            return 0;			// can't have both DIRECTORY and ARCHIVE set
        }

        if(!valid_fat_dentry_name(dentry.name,dentry.ext)) return 0; // invalid name
        if(dentry.ctimeten>199) return 0;	// create time fine resolution, 0..199
        uint16_t ctime = fat16int(dentry.ctime);
        uint16_t cdate = fat16int(dentry.cdate);
        uint16_t adate = fat16int(dentry.adate);
        uint16_t wtime = fat16int(dentry.wtime);
        uint16_t wdate = fat16int(dentry.wdate);
        if(ctime && !FATFS_ISTIME(ctime)) return 0; // ctime is null for directories
        if(cdate && !FATFS_ISDATE(cdate)) return 0; // cdate is null for directories
        if(adate && !FATFS_ISDATE(adate)) return 0; // adate is null for directories
        if(adate==0 && ctime==0 && cdate==0) {
            if(dentry.attrib & FATFS_ATTR_VOLUME) return 1; // volume name
            return 0;					    // not a volume name
        }
        if(!FATFS_ISTIME(wtime)) return 0; // invalid wtime
        if(!FATFS_ISDATE(wdate)) return 0; // invalid wdate
        if(ctime && ctime==cdate) return 0; // highly unlikely
        if(wtime && wtime==wdate) return 0; // highly unlikely
        if(adate && adate==ctime) return 0; // highly unlikely
        if(adate && adate==wtime) return 0; // highly unlikely
    }
    return 1;
}
예제 #5
0
// read utf16 and return unvalidated utf8
string get_possible_utf16(const sbuf_t sbuf, size_t count, sbuf_t::byte_order_t byte_order) {

    // check for sequence
    if (chars_match(sbuf, count) || char_pairs_match(sbuf, count)) {
        // this is an uninteresting sequence, so return ""
        return "";
    }

    // get wstring accounting for byte order
    wstring wstr;
    sbuf.getUTF16(0, count, byte_order, wstr);

    // convert wstring to string
    string utf8_string = all_utf16to8(wstr);

#ifdef DEBUG
    cout << "exif_entry.get_possible_utf16 utf8_string (escaped): '" << validateOrEscapeUTF8(utf8_string, true, true) << "'\n";
#endif
    return utf8_string;
}
예제 #6
0
/* write a portion of memory to the disk and a child fileobject. */
void tcpdemux::write_to_file(std::stringstream &xmlattr,
                             const std::string &fname,
                             const sbuf_t &sbuf)
{
    int fd = retrying_open(fname,O_WRONLY|O_CREAT|O_BINARY|O_TRUNC,0644);
    if(fd>=0) {
        size_t count = sbuf.write(fd,0,sbuf.size());
        if(close(fd)!=0 || count!=sbuf.size()) {
            xmlattr << "<write_error errno='" << errno << "' buflen='" << sbuf.size() << "' count='" << count << "'>";
        } else {
            xmlattr << "<byte_run file_offset='" << sbuf.pos0.offset << "' len='" << sbuf.size() << "'>"
                    << "<filename>" << fname << "</filename>"
                    << "<hashdigest type='MD5'>" << sbuf.md5().hexdigest() << "</hashdigest>"
                    << "</byte_run>\n";
        }
    }
}
    prefetch_record_t(const sbuf_t &sbuf):prefetch_version(), header_size(0), execution_filename(),
        execution_counter(0), execution_time(0), volume_path_name(),
        volume_serial_number(0), volume_creation_time(0),
        files(), directories() {

        // read fields in order until done or range exception
        try {

            // get prefetch version identifier
            uint8_t prefetch_version_byte = sbuf.get8u(0);

            // set values based on prefetch version
            size_t execution_time_offset=0;
            size_t execution_counter_offset=0;
            if (prefetch_version_byte == 0x11) {
                prefetch_version = "Windows XP";
                execution_time_offset = 0x78;
                execution_counter_offset = 0x90;
            } else if (prefetch_version_byte == 0x17) {
                prefetch_version = "Windows Vista or Windows 7";
                execution_time_offset = 0x80;
                execution_counter_offset = 0x98;
            } else {
                // program error: don't create prefetch_record if this byte is invalid.
                // This was an assert(0), but let's just return
                return ;
            }

            // size in bytes of the whole prefetch file
            uint32_t prefetch_file_length = sbuf.get32u(0x0c);

            // get execution file filename
            wstring utf16_execution_filename;
            sbuf.getUTF16(0x10, utf16_execution_filename);
            execution_filename = safe_utf16to8(utf16_execution_filename);

            // get the offset to Section A
            uint32_t section_a_offset = sbuf.get32u(0x54);
            header_size = section_a_offset; // header is everything before section A

            // validate the offset since we know what it should be
            if ((prefetch_version_byte == 0x11 && header_size != 0x98)		// XP and 2003
                    || (prefetch_version_byte == 0x17 && header_size != 0xf0)) {	// Vista and 7
                // invalid so quit trying
                return;
            }

            execution_time = sbuf.get64u(execution_time_offset);
            execution_counter = sbuf.get32u(execution_counter_offset);

            // get the list of files from Section C
            uint32_t section_c_offset = sbuf.get32u(0x64);
            uint32_t section_c_length = sbuf.get32u(0x68);
            sbuf_stream filename_stream(sbuf + section_c_offset);
            while (filename_stream.tell() < section_c_length) {
                wstring utf16_filename;
                filename_stream.getUTF16(utf16_filename);
                string filename = safe_utf16to8(utf16_filename);
                if (!valid_full_path_name(filename)) return;
                files.push_back(filename);
            }

            // Process Section D
            uint32_t section_d_offset = sbuf.get32u(0x6c);

            uint32_t volume_name_offset = sbuf.get32u(section_d_offset + 0x00);
            wstring utf16_volume_name;
            sbuf.getUTF16(section_d_offset+volume_name_offset, utf16_volume_name);
            volume_path_name = safe_utf16to8(utf16_volume_name);

            volume_creation_time = sbuf.get64i(section_d_offset+0x08);
            volume_serial_number = sbuf.get32u(section_d_offset+0x10);

            uint32_t section_d_2_offset = sbuf.get32u(section_d_offset + 0x1c);
            size_t   directory_offset = section_d_offset + section_d_2_offset;
            uint32_t num_directory_entries = sbuf.get32u(section_d_offset + 0x20);

            // get each of the directory entries from Section D subsection 2
            if (directory_offset > prefetch_file_length) {
                // the offset is out of range so don't get the list of directories
            } else {
                // calculate a rough maximum number of bytes for directory entries
                size_t upper_max = prefetch_file_length - directory_offset;

                sbuf_stream directory_stream = sbuf_stream(sbuf + directory_offset);

                for (uint32_t i=0; i<num_directory_entries; i++) {
                    // break if obviously out of range
                    if (directory_stream.tell() > upper_max) {
                        return;		// rest of data not good
                    }

                    // for directories, the first int16 is the directory name length.
                    // We read to \U0000 instead so we throw away the directory name length.
                    directory_stream.get16u();

                    // read the directory name
                    wstring utf16_directory_name;
                    directory_stream.getUTF16(utf16_directory_name);
                    string directory_name = safe_utf16to8(utf16_directory_name);
                    if (!valid_full_path_name(directory_name)) return;
                    directories.push_back(directory_name);
                }
            }
        } catch (sbuf_t::range_exception_t &e) {
            // no action, just return what was gleaned before range exception
        }
    }
예제 #8
0
/**
 * @param sbuf   - the buffer to carve
 * @param pos    - offset in the buffer to carve
 * @param len    - how many bytes to carve
 * @param hasher - to compute the hash of the carved object.
 *
 */
std::string feature_recorder::carve(const sbuf_t &sbuf,size_t pos,size_t len,
                                    const std::string &ext,
                                    const be13::hash_def &hasher)
{
    if(flags & FLAG_DISABLED) return std::string();           // disabled

    /* If we are in the margin, ignore; it will be processed again */
    if(pos >= sbuf.pagesize && pos < sbuf.bufsize){
        return std::string();
    }

    if(pos >= sbuf.bufsize){    /* Sanity checks */
        cerr << "*** carve: WRITE OUTSIDE BUFFER.  pos=" << pos << " sbuf=" << sbuf << "\n";
        return std::string();
    }

    /* Carve to a file depending on the carving mode.  The purpose
     * of CARVE_ENCODED is to allow us to carve JPEGs when they are
     * embedded in, say, GZIP files, but not carve JPEGs that are
     * bare.  The difficulty arises when you have a tool that can go
     * into, say, ZIP files. In this case, we don't want to carve
     * every ZIP file, just the (for example) XORed ZIP files. So the
     * ZIP carver doesn't carve every ZIP file, just the ZIP files
     * that are in HIBER files.  That is, we want to not carve a path
     * of ZIP-234234 but we do want to carve a path of
     * 1000-HIBER-33423-ZIP-2343.  This is implemented by having an
     * ignore_encoding. the ZIP carver sets it to ZIP so it won't
     * carve things that are just found in a ZIP file. This means that
     * it won't carve disembodied ZIP files found in unallocated
     * space. You might want to do that.  If so, set ZIP's carve mode
     * to CARVE_ALL.
     */
    switch(carve_mode){
    case CARVE_NONE:
        return std::string();                         // carve nothing
    case CARVE_ENCODED:
        if(sbuf.pos0.path.size()==0) return std::string(); // not encoded
        if(sbuf.pos0.alphaPart()==ignore_encoding) return std::string(); // ignore if it is just encoded with this
        break;                                      // otherwise carve
    case CARVE_ALL:
        break;
    }

    /* If the directory doesn't exist, make it.
     * If two threads try to make the directory,
     * that's okay, because the second one will fail.
     */

#ifdef HAVE___SYNC_ADD_AND_FETCH
    uint64_t this_file_number = __sync_add_and_fetch(&file_number,1);
#else
    uint64_t this_file_number = 0;
    {
        cppmutex::lock lock(Mf);
        this_file_number = file_number++;
    }
#endif

    std::string dirname1 = outdir + "/" + name;
    std::stringstream ss;

    ss << dirname1 << "/" << std::setw(3) << std::setfill('0') << (this_file_number / 1000);

    std::string dirname2 = ss.str(); 
    std::string fname    = dirname2 + std::string("/") + valid_dosname(sbuf.pos0.str() + ext);
    std::string carved_hash_hexvalue = (*hasher.func)(sbuf.buf,sbuf.bufsize);

    /* Record what was found in the feature file.
     */
    ss.str(std::string()); // clear the stringstream
    ss << "<fileobject><filename>" << fname << "</filename><filesize>" << len << "</filesize>"
       << "<hashdigest type='" << hasher.name << "'>" << carved_hash_hexvalue << "</hashdigest></fileobject>";
    this->write(sbuf.pos0+len,fname,ss.str());
    
    /* Make the directory if it doesn't exist.  */
    if (access(dirname2.c_str(),R_OK)!=0){
#ifdef WIN32
        mkdir(dirname1.c_str());
        mkdir(dirname2.c_str());
#else   
        mkdir(dirname1.c_str(),0777);
        mkdir(dirname2.c_str(),0777);
#endif
    }
    /* Check to make sure that directory is there. We don't just the return code
     * because there could have been two attempts to make the directory simultaneously,
     * so the mkdir could fail but the directory could nevertheless exist. We need to
     * remember the error number because the access() call may clear it.
     */
    int oerrno = errno;                 // remember error number
    if (access(dirname2.c_str(),R_OK)!=0){
        cerr << "Could not make directory " << dirname2 << ": " << strerror(oerrno) << "\n";
        return std::string();
    }

    /* Write the file into the directory */
    int fd = ::open(fname.c_str(),O_CREAT|O_BINARY|O_RDWR,0666);
    if(fd<0){
        cerr << "*** carve: Cannot create " << fname << ": " << strerror(errno) << "\n";
        return std::string();
    }

    ssize_t ret = sbuf.write(fd,pos,len);
    if(ret<0){
        cerr << "*** carve: Cannot write(pos=" << fd << "," << pos << " len=" << len << "): "<< strerror(errno) << "\n";
    }
    ::close(fd);
    return fname;
}