void feature_recorder::write_buf(const sbuf_t &sbuf,size_t pos,size_t len) { #ifdef DEBUG_SCANNER if(debug & DEBUG_SCANNER){ std::cerr << "*** write_buf " << name << " sbuf=" << sbuf << " pos=" << pos << " len=" << len << "\n"; // for debugging, print Imagine that when pos= the location where the crash is happening. // then set a breakpoint at std::cerr. if(pos==9999999){ std::cerr << "Imagine that\n"; } } #endif /* If we are in the margin, ignore; it will be processed again */ if(pos >= sbuf.pagesize && pos < sbuf.bufsize){ return; } if(pos >= sbuf.bufsize){ /* Sanity checks */ std::cerr << "*** write_buf: WRITE OUTSIDE BUFFER. " << " pos=" << pos << " sbuf=" << sbuf << "\n"; return; } /* Asked to write beyond bufsize; bring it in */ if(pos+len > sbuf.bufsize){ len = sbuf.bufsize - pos; } string feature = sbuf.substr(pos,len); string context; if((flags & FLAG_NO_CONTEXT)==0){ /* Context write; create a clean context */ size_t p0 = context_window_before < pos ? pos-context_window_before : 0; size_t p1 = pos+len+context_window_after; if(p1>sbuf.bufsize) p1 = sbuf.bufsize; assert(p0<=p1); context = sbuf.substr(p0,p1-p0); } this->write(sbuf.pos0+pos,feature,context); #ifdef DEBUG_SCANNER if(debug & DEBUG_SCANNER){ std::cerr << ".\n"; } #endif }
// read utf32 and return unvalidated utf8 string get_possible_utf32(const sbuf_t &sbuf, size_t count, sbuf_t::byte_order_t byte_order) { // check for sequence if (chars_match(sbuf, count) || char_pairs_match(sbuf, count)) { // this is an uninteresting sequence, so return "" return ""; } std::string utf8_string; std::back_insert_iterator<std::basic_string<char> > result = back_inserter(utf8_string); for (uint32_t i=0; i<count; i++) { try { uint32_t code_point; code_point = sbuf.get32u(i * 4, byte_order); try { result = utf8::append(code_point, result); } catch (utf8::invalid_code_point) { // invalid code point so put in the byte values directly, // disregarding endian convention utf8_string += (uint8_t)code_point; utf8_string += (uint8_t)code_point/0x100; utf8_string += (uint8_t)code_point/0x10000; utf8_string += (uint8_t)code_point/0x1000000; } } catch (sbuf_t::range_exception_t &e) { // at end of buffer break; } } return utf8_string; }
static string get_possible_utf8(const sbuf_t &sbuf, size_t count) { if (chars_match(sbuf, count) || char_pairs_match(sbuf, count)) { // this is an uninteresting sequence, so return "" return ""; } else { std::string s; sbuf.getUTF8WithQuoting(0, count, s); return s; } }
/** * Return 0 if the directory is invalid * 1 if the directory is valid dentry * 2 if the directory is valid LFN * 10 if the directory is valid and it's the last in the sector. * 20 if all null, so there are no more valid * * http://en.wikipedia.org/wiki/File_Allocation_Table */ int valid_fat_directory_entry(const sbuf_t &sbuf) { if(sbuf.bufsize != sizeof(fatfs_dentry)) return 0; // not big enough /* If the entire directory entry is the same character, it's not valid */ if(sbuf.is_constant(sbuf[0])) return 20; // clearly not valid const fatfs_dentry &dentry = *(sbuf.get_struct_ptr<fatfs_dentry>(0)); if((dentry.attrib & ~FATFS_ATTR_ALL) != 0) return 0; // invalid attribute bit set if(dentry.attrib == FATFS_ATTR_LFN) { /* This may be a VFAT long file name */ const fatfs_dentry_lfn &lfn = *(const fatfs_dentry_lfn *)sbuf.buf; if((lfn.seq & ~0x40) > 10) return 0; // invalid sequence number if(lfn.reserved1 != 0) return 0; // invalid reserved1 (LDIR_Type) if(fat16int(lfn.reserved2)!=0) return 0; // LDIR_FstClusLO "Must be ZERO" return 2; // looks okay } else { if(dentry.name[0]==0) return 10; // "Entry is available and no subsequent entry is in use. " /* Look for combinations of times, dates and attributes that have been invalid */ if((dentry.attrib & FATFS_ATTR_LFN)==FATFS_ATTR_LFN && (dentry.attrib != FATFS_ATTR_LFN)) { return 0; // LFN set but DIR or ARCHIVE is also set } if((dentry.attrib & FATFS_ATTR_DIRECTORY) && (dentry.attrib & FATFS_ATTR_ARCHIVE)) { return 0; // can't have both DIRECTORY and ARCHIVE set } if(!valid_fat_dentry_name(dentry.name,dentry.ext)) return 0; // invalid name if(dentry.ctimeten>199) return 0; // create time fine resolution, 0..199 uint16_t ctime = fat16int(dentry.ctime); uint16_t cdate = fat16int(dentry.cdate); uint16_t adate = fat16int(dentry.adate); uint16_t wtime = fat16int(dentry.wtime); uint16_t wdate = fat16int(dentry.wdate); if(ctime && !FATFS_ISTIME(ctime)) return 0; // ctime is null for directories if(cdate && !FATFS_ISDATE(cdate)) return 0; // cdate is null for directories if(adate && !FATFS_ISDATE(adate)) return 0; // adate is null for directories if(adate==0 && ctime==0 && cdate==0) { if(dentry.attrib & FATFS_ATTR_VOLUME) return 1; // volume name return 0; // not a volume name } if(!FATFS_ISTIME(wtime)) return 0; // invalid wtime if(!FATFS_ISDATE(wdate)) return 0; // invalid wdate if(ctime && ctime==cdate) return 0; // highly unlikely if(wtime && wtime==wdate) return 0; // highly unlikely if(adate && adate==ctime) return 0; // highly unlikely if(adate && adate==wtime) return 0; // highly unlikely } return 1; }
// read utf16 and return unvalidated utf8 string get_possible_utf16(const sbuf_t sbuf, size_t count, sbuf_t::byte_order_t byte_order) { // check for sequence if (chars_match(sbuf, count) || char_pairs_match(sbuf, count)) { // this is an uninteresting sequence, so return "" return ""; } // get wstring accounting for byte order wstring wstr; sbuf.getUTF16(0, count, byte_order, wstr); // convert wstring to string string utf8_string = all_utf16to8(wstr); #ifdef DEBUG cout << "exif_entry.get_possible_utf16 utf8_string (escaped): '" << validateOrEscapeUTF8(utf8_string, true, true) << "'\n"; #endif return utf8_string; }
/* write a portion of memory to the disk and a child fileobject. */ void tcpdemux::write_to_file(std::stringstream &xmlattr, const std::string &fname, const sbuf_t &sbuf) { int fd = retrying_open(fname,O_WRONLY|O_CREAT|O_BINARY|O_TRUNC,0644); if(fd>=0) { size_t count = sbuf.write(fd,0,sbuf.size()); if(close(fd)!=0 || count!=sbuf.size()) { xmlattr << "<write_error errno='" << errno << "' buflen='" << sbuf.size() << "' count='" << count << "'>"; } else { xmlattr << "<byte_run file_offset='" << sbuf.pos0.offset << "' len='" << sbuf.size() << "'>" << "<filename>" << fname << "</filename>" << "<hashdigest type='MD5'>" << sbuf.md5().hexdigest() << "</hashdigest>" << "</byte_run>\n"; } } }
prefetch_record_t(const sbuf_t &sbuf):prefetch_version(), header_size(0), execution_filename(), execution_counter(0), execution_time(0), volume_path_name(), volume_serial_number(0), volume_creation_time(0), files(), directories() { // read fields in order until done or range exception try { // get prefetch version identifier uint8_t prefetch_version_byte = sbuf.get8u(0); // set values based on prefetch version size_t execution_time_offset=0; size_t execution_counter_offset=0; if (prefetch_version_byte == 0x11) { prefetch_version = "Windows XP"; execution_time_offset = 0x78; execution_counter_offset = 0x90; } else if (prefetch_version_byte == 0x17) { prefetch_version = "Windows Vista or Windows 7"; execution_time_offset = 0x80; execution_counter_offset = 0x98; } else { // program error: don't create prefetch_record if this byte is invalid. // This was an assert(0), but let's just return return ; } // size in bytes of the whole prefetch file uint32_t prefetch_file_length = sbuf.get32u(0x0c); // get execution file filename wstring utf16_execution_filename; sbuf.getUTF16(0x10, utf16_execution_filename); execution_filename = safe_utf16to8(utf16_execution_filename); // get the offset to Section A uint32_t section_a_offset = sbuf.get32u(0x54); header_size = section_a_offset; // header is everything before section A // validate the offset since we know what it should be if ((prefetch_version_byte == 0x11 && header_size != 0x98) // XP and 2003 || (prefetch_version_byte == 0x17 && header_size != 0xf0)) { // Vista and 7 // invalid so quit trying return; } execution_time = sbuf.get64u(execution_time_offset); execution_counter = sbuf.get32u(execution_counter_offset); // get the list of files from Section C uint32_t section_c_offset = sbuf.get32u(0x64); uint32_t section_c_length = sbuf.get32u(0x68); sbuf_stream filename_stream(sbuf + section_c_offset); while (filename_stream.tell() < section_c_length) { wstring utf16_filename; filename_stream.getUTF16(utf16_filename); string filename = safe_utf16to8(utf16_filename); if (!valid_full_path_name(filename)) return; files.push_back(filename); } // Process Section D uint32_t section_d_offset = sbuf.get32u(0x6c); uint32_t volume_name_offset = sbuf.get32u(section_d_offset + 0x00); wstring utf16_volume_name; sbuf.getUTF16(section_d_offset+volume_name_offset, utf16_volume_name); volume_path_name = safe_utf16to8(utf16_volume_name); volume_creation_time = sbuf.get64i(section_d_offset+0x08); volume_serial_number = sbuf.get32u(section_d_offset+0x10); uint32_t section_d_2_offset = sbuf.get32u(section_d_offset + 0x1c); size_t directory_offset = section_d_offset + section_d_2_offset; uint32_t num_directory_entries = sbuf.get32u(section_d_offset + 0x20); // get each of the directory entries from Section D subsection 2 if (directory_offset > prefetch_file_length) { // the offset is out of range so don't get the list of directories } else { // calculate a rough maximum number of bytes for directory entries size_t upper_max = prefetch_file_length - directory_offset; sbuf_stream directory_stream = sbuf_stream(sbuf + directory_offset); for (uint32_t i=0; i<num_directory_entries; i++) { // break if obviously out of range if (directory_stream.tell() > upper_max) { return; // rest of data not good } // for directories, the first int16 is the directory name length. // We read to \U0000 instead so we throw away the directory name length. directory_stream.get16u(); // read the directory name wstring utf16_directory_name; directory_stream.getUTF16(utf16_directory_name); string directory_name = safe_utf16to8(utf16_directory_name); if (!valid_full_path_name(directory_name)) return; directories.push_back(directory_name); } } } catch (sbuf_t::range_exception_t &e) { // no action, just return what was gleaned before range exception } }
/** * @param sbuf - the buffer to carve * @param pos - offset in the buffer to carve * @param len - how many bytes to carve * @param hasher - to compute the hash of the carved object. * */ std::string feature_recorder::carve(const sbuf_t &sbuf,size_t pos,size_t len, const std::string &ext, const be13::hash_def &hasher) { if(flags & FLAG_DISABLED) return std::string(); // disabled /* If we are in the margin, ignore; it will be processed again */ if(pos >= sbuf.pagesize && pos < sbuf.bufsize){ return std::string(); } if(pos >= sbuf.bufsize){ /* Sanity checks */ cerr << "*** carve: WRITE OUTSIDE BUFFER. pos=" << pos << " sbuf=" << sbuf << "\n"; return std::string(); } /* Carve to a file depending on the carving mode. The purpose * of CARVE_ENCODED is to allow us to carve JPEGs when they are * embedded in, say, GZIP files, but not carve JPEGs that are * bare. The difficulty arises when you have a tool that can go * into, say, ZIP files. In this case, we don't want to carve * every ZIP file, just the (for example) XORed ZIP files. So the * ZIP carver doesn't carve every ZIP file, just the ZIP files * that are in HIBER files. That is, we want to not carve a path * of ZIP-234234 but we do want to carve a path of * 1000-HIBER-33423-ZIP-2343. This is implemented by having an * ignore_encoding. the ZIP carver sets it to ZIP so it won't * carve things that are just found in a ZIP file. This means that * it won't carve disembodied ZIP files found in unallocated * space. You might want to do that. If so, set ZIP's carve mode * to CARVE_ALL. */ switch(carve_mode){ case CARVE_NONE: return std::string(); // carve nothing case CARVE_ENCODED: if(sbuf.pos0.path.size()==0) return std::string(); // not encoded if(sbuf.pos0.alphaPart()==ignore_encoding) return std::string(); // ignore if it is just encoded with this break; // otherwise carve case CARVE_ALL: break; } /* If the directory doesn't exist, make it. * If two threads try to make the directory, * that's okay, because the second one will fail. */ #ifdef HAVE___SYNC_ADD_AND_FETCH uint64_t this_file_number = __sync_add_and_fetch(&file_number,1); #else uint64_t this_file_number = 0; { cppmutex::lock lock(Mf); this_file_number = file_number++; } #endif std::string dirname1 = outdir + "/" + name; std::stringstream ss; ss << dirname1 << "/" << std::setw(3) << std::setfill('0') << (this_file_number / 1000); std::string dirname2 = ss.str(); std::string fname = dirname2 + std::string("/") + valid_dosname(sbuf.pos0.str() + ext); std::string carved_hash_hexvalue = (*hasher.func)(sbuf.buf,sbuf.bufsize); /* Record what was found in the feature file. */ ss.str(std::string()); // clear the stringstream ss << "<fileobject><filename>" << fname << "</filename><filesize>" << len << "</filesize>" << "<hashdigest type='" << hasher.name << "'>" << carved_hash_hexvalue << "</hashdigest></fileobject>"; this->write(sbuf.pos0+len,fname,ss.str()); /* Make the directory if it doesn't exist. */ if (access(dirname2.c_str(),R_OK)!=0){ #ifdef WIN32 mkdir(dirname1.c_str()); mkdir(dirname2.c_str()); #else mkdir(dirname1.c_str(),0777); mkdir(dirname2.c_str(),0777); #endif } /* Check to make sure that directory is there. We don't just the return code * because there could have been two attempts to make the directory simultaneously, * so the mkdir could fail but the directory could nevertheless exist. We need to * remember the error number because the access() call may clear it. */ int oerrno = errno; // remember error number if (access(dirname2.c_str(),R_OK)!=0){ cerr << "Could not make directory " << dirname2 << ": " << strerror(oerrno) << "\n"; return std::string(); } /* Write the file into the directory */ int fd = ::open(fname.c_str(),O_CREAT|O_BINARY|O_RDWR,0666); if(fd<0){ cerr << "*** carve: Cannot create " << fname << ": " << strerror(errno) << "\n"; return std::string(); } ssize_t ret = sbuf.write(fd,pos,len); if(ret<0){ cerr << "*** carve: Cannot write(pos=" << fd << "," << pos << " len=" << len << "): "<< strerror(errno) << "\n"; } ::close(fd); return fname; }