/** * Throws exif_failure_exception if the exif data state is determined to be invalid * such that further entry parsing would be invalid. * @Throws exif_failure_exception_t */ static string get_exif_long(tiff_handle_t &tiff_handle, uint32_t ifd_entry_offset) { // print each long separated by a space uint32_t count = get_entry_count(tiff_handle, ifd_entry_offset); uint32_t offset = get_data_offset(tiff_handle, ifd_entry_offset); // Abort on count overflow tiff_handle.bytes_read += count * 4; // exif standard: 1 exif long is 4 bytes long if (count >= 0x4000 || tiff_handle.bytes_read >= 0x10000) throw exif_failure_exception_t(); if (count == 1) { // count is 1 so print the long directly stringstream ss; try { ss << (uint32_t)tiff_handle.sbuf->get32u(offset); } catch (sbuf_t::range_exception_t &e) { // add nothing to ss } return ss.str(); } else { // count is not 1 so print the uint32_t bytes as utf8 std::string s = get_possible_utf32(*(tiff_handle.sbuf)+offset, count, tiff_handle.byte_order); #ifdef DEBUG cout << "exif_entry.get_exif_short (escaped): '" << validateOrEscapeUTF8(s, true, true) << "'\n"; #endif return s; } }
int main(int argc,char **argv) { std::cout << "Unicode Escape Regression Tester\n"; int ch; while ((ch = getopt(argc,argv,"r:h")) != -1){ switch(ch) { case 'r': testfile(optarg); break; } } const char buf[] = {0xef, 0xbe, 0xad, 0x5c}; check(std::string(buf,1),true); check(std::string(buf,2),true); check(std::string(buf,3),true); check(std::string(buf,4),true); /* Runs 16 copies simultaneously... */ uint32_t max=0xFFFFFFFF; // 2^32-1 for(uint64_t prefix=0;prefix<max;prefix+=0x10000000){ pid_t child = fork(); if(child==0){ /* Try all 4-byte sequences in the prefix range...*/ for(uint32_t k=0;k<=0x0FFFFFFF;k++){ uint32_t i=prefix+k; std::string ugly((char *)&i,4); check(ugly,false); if((i & 0x00FFFFFF)==0x00FFFFFF){ printf("pid=%d prefix=%x i=%x\n",getpid(),(uint32_t)prefix,(uint32_t)i); fflush(stdout); } } exit(0); } printf("Launched PID %d\n",child); fflush(stdout); } for(int i=0;i<16;i++){ int s=0; pid_t p = wait(&s); printf("pid %d finished with exit code %d\n",p,s); } std::cout << "done\n"; exit(1); /* Generic fuzzing. Try random attempts */ std::string line; while(getline(std::cin,line)){ std::cout << validateOrEscapeUTF8(line,true) << "\n"; } }
void testfile(const char *fn) { debug |= DEBUG_PEDANTIC; std::cout << "testing file " << fn << "\n"; ifstream i(fn); if(i.is_open()){ string line; getline(i,line); std::cout << "line length: " << line.size() << "\n"; std::cout << "calling ValidateOrEscapeUTF8 to escape...\n"; string l2 = validateOrEscapeUTF8(line,true); std::cout << " length l2: " << l2.size() << "\n"; std::cout << "calling ValidateOrEscapeUTF8 to validate...\n"; validateOrEscapeUTF8(l2,false); std::cout << "calling check...\n"; check(l2,false); } std::cout << "done\n"; exit(0); }
int main(int argc,char **argv) { /* Try all 4-byte sequences */ for(uint32_t i=0;i<=0xFFFFFFFF;i++){ std::string ugly((char *)&i,4); std::string res = validateOrEscapeUTF8(ugly); std::wstring utf16; /* Now check to make sure it is valid UTF8 */ try { utf8::utf8to16(res.begin(),res.end(),std::back_inserter(utf16)); } catch(utf8::exception){ printf("utf8 error i=%d hex sequence: ",i); for(size_t j=0;j<ugly.size();j++){ printf("%02x ",(unsigned char)ugly[j]); } printf(" encoded as: "); for(size_t j=0;j<res.size();j++){ printf("%02x ",(unsigned char)res[j]); } printf("\n"); } catch(std::exception){ std::cout << "other exception on i=" << i << "\n"; } if(i%1000000==0) std::cout << "i=" << i << "\n"; } std::cout << "done\n"; exit(1); /* Generic fuzzing. Try random attempts */ std::string line; while(getline(std::cin,line)){ std::cout << validateOrEscapeUTF8(line) << "\n"; } }
/** * Read the requested number of UTF-8 format string octets including any \0. */ void sbuf_t::getUTF8WithQuoting(size_t i, size_t num_octets_requested, std::string &utf8_string) const { // clear any residual value utf8_string = ""; if(i>=bufsize) { // past EOF return; } if(i+num_octets_requested>bufsize) { // clip at EOF num_octets_requested = bufsize - i; } utf8_string = std::string((const char *)buf+i,num_octets_requested); // validate or escape utf8_string utf8_string = validateOrEscapeUTF8(utf8_string, true, true); }
void content::write_record() { if(opt_magic) { file_info("libmagic",validateOrEscapeUTF8(this->filemagic())); } if(this->segs.size()>0){ string runs = ""; for(seglist::const_iterator i = this->segs.begin();i!=this->segs.end();i++){ char buf[1024]; if(i->flags & TSK_FS_BLOCK_FLAG_SPARSE){ sprintf(buf," <byte_run file_offset='%"PRIu64"' fill='0' len='%"PRIu64"'/>\n", i->file_offset,i->len); } else if (i->flags & TSK_FS_BLOCK_FLAG_RAW){ sprintf(buf, " <byte_run file_offset='%"PRIu64"' fs_offset='%"PRIu64"' " "img_offset='%"PRIu64"' len='%"PRIu64"'/>\n", i->file_offset,i->fs_offset,i->img_offset,i->len); } else if (i->flags & TSK_FS_BLOCK_FLAG_COMP){ if(i->fs_offset){ sprintf(buf, " <byte_run file_offset='%"PRIu64"' fs_offset='%"PRIu64"' " "img_offset='%"PRIu64"' uncompressed_len='%"PRIu64"'/>\n", i->file_offset,i->fs_offset,i->img_offset,i->len); } else { sprintf(buf, " <byte_run file_offset='%"PRIu64"' uncompressed_len='%"PRIu64"'/>\n", i->file_offset,i->len); } } else if (i->flags & TSK_FS_BLOCK_FLAG_RES){ sprintf(buf, " <byte_run file_offset='%"PRIu64"' fs_offset='%"PRIu64"' " "img_offset='%"PRIu64"' len='%"PRIu64"' type='resident'/>\n", i->file_offset,i->fs_offset,i->img_offset,i->len); } else{ sprintf(buf," <byte_run file_offset='%"PRIu64"' unknown_flags='%d'/>\n",i->file_offset,i->flags); } runs += buf; } file_info_xml("byte_runs",runs); if(!invalid){ if(opt_md5 && h_md5.hashed_bytes>0) file_info(h_md5.final()); if(opt_sha1 && h_sha1.hashed_bytes>0) file_info(h_sha1.final()); } }
// read utf16 and return unvalidated utf8 string get_possible_utf16(const sbuf_t sbuf, size_t count, sbuf_t::byte_order_t byte_order) { // check for sequence if (chars_match(sbuf, count) || char_pairs_match(sbuf, count)) { // this is an uninteresting sequence, so return "" return ""; } // get wstring accounting for byte order wstring wstr; sbuf.getUTF16(0, count, byte_order, wstr); // convert wstring to string string utf8_string = all_utf16to8(wstr); #ifdef DEBUG cout << "exif_entry.get_possible_utf16 utf8_string (escaped): '" << validateOrEscapeUTF8(utf8_string, true, true) << "'\n"; #endif return utf8_string; }
/** * Read UTF-8 format code octets into string up to but not including \0. */ void sbuf_t::getUTF8WithQuoting(size_t i, std::string &utf8_string) const { // clear any residual value utf8_string = ""; // read octets for (size_t off=i; off<bufsize; off++) { uint8_t octet = get8u(off); // stop before \0 if (octet == 0) { // at \0 break; } // accept the octet utf8_string.push_back(octet); } // validate or escape utf8_string utf8_string = validateOrEscapeUTF8(utf8_string, true, true); }
void check(const std::string &ugly,bool verbose) { std::string res = validateOrEscapeUTF8(ugly,true); std::wstring utf16; /* Now check to make sure it is valid UTF8 */ try { utf8::utf8to16(res.begin(),res.end(),std::back_inserter(utf16)); if(verbose){ show(ugly); printf(" successfully encodes as "); show(res); printf(" (\"%s\")\n",res.c_str()); } } catch(utf8::exception){ printf("utf8 error hex sequence: "); show(ugly); printf(" encoded as: "); show(res); printf("\n"); } catch(std::exception){ std::cout << "other exception \n"; } }
void feature_recorder::write(const pos0_t &pos0,const string &feature_,const string &context_) { if(flags & FLAG_DISABLED) return; // disabled if(debug & DEBUG_PEDANTIC){ if(feature_.size() > opt_max_feature_size){ std::cerr << "feature_recorder::write : feature_.size()=" << feature_.size() << "\n"; assert(0); } if(context_.size() > opt_max_context_size){ std::cerr << "feature_recorder::write : context_.size()=" << context_.size() << "\n"; assert(0); } } /* By default quote string that is not UTF-8, and quote backslashes. */ bool escape_bad_utf8 = true; bool escape_backslash = true; if(flags & FLAG_NO_QUOTE){ // don't quote either escape_bad_utf8 = false; escape_backslash = false; } if(flags & FLAG_XML){ // only quote bad utf8 escape_bad_utf8 = true; escape_backslash = false; } string feature = validateOrEscapeUTF8(feature_, escape_bad_utf8,escape_backslash); string context; if((flags & FLAG_NO_CONTEXT)==0){ context = validateOrEscapeUTF8(context_,escape_bad_utf8,escape_backslash); } if(feature.size() > opt_max_feature_size) feature = feature.substr(0,opt_max_feature_size); if(context.size() > opt_max_context_size) context = context.substr(0,opt_max_context_size); if(feature.size()==0){ cerr << "zero length feature at " << pos0 << "\n"; if(debug & DEBUG_PEDANTIC) assert(0); return; } if(debug & DEBUG_PEDANTIC){ /* Check for tabs or newlines in feature and and context */ for(size_t i=0;i<feature.size();i++){ if(feature[i]=='\t') assert(0); if(feature[i]=='\n') assert(0); if(feature[i]=='\r') assert(0); } for(size_t i=0;i<context.size();i++){ if(context[i]=='\t') assert(0); if(context[i]=='\n') assert(0); if(context[i]=='\r') assert(0); } } #ifdef USE_STOP_LIST /* First check to see if the feature is on the stop list. * Only do this if we have a stop_list_recorder (the stop list recorder itself * does not have a stop list recorder. If it did we would infinitely recurse. */ if(((flags & FLAG_NO_STOPLIST)==0) && stop_list_recorder){ if(stop_list.check_feature_context(feature,context)){ stop_list_recorder->write(pos0,feature,context); return; } } #endif #ifdef USE_ALERT_LIST /* The alert list is a special features that are called out. * If we have one of those, write it to the redlist. */ if(((flags & FLAG_NO_ALERTLIST)==0) && alert_list.check_feature_context(feature,context)){ string alert_fn = outdir + "/ALERTS_found.txt"; cppmutex::lock lock(Mr); // notce we are locking the redlist ofstream rf(alert_fn.c_str(),ios_base::app); if(rf.is_open()){ rf << pos0.shift(feature_recorder::offset_add).str() << '\t' << feature << '\t' << "\n"; } } #endif /* Finally write out the feature and the context */ stringstream ss; ss << pos0.shift(feature_recorder::offset_add).str() << '\t' << feature; if(((flags & FLAG_NO_CONTEXT)==0) && (context.size()>0)) ss << '\t' << context; this->write(ss.str()); }