static int header_check_perlm(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { unsigned int i; const unsigned int buffer_size_test=(buffer_size < 2048 ? buffer_size : 2048); for(i=0; i<128 && buffer[i]!=';' && buffer[i]!='\n'; i++); if(buffer[i]!=';') return 0; reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; if( td_memmem(buffer, buffer_size_test, "class", 5)!=NULL || td_memmem(buffer, buffer_size_test, "private static", 14)!=NULL || td_memmem(buffer, buffer_size_test, "public interface", 16)!=NULL) { /* source code in java */ #ifdef DJGPP file_recovery_new->extension="jav"; #else file_recovery_new->extension="java"; #endif } else { /* perl module */ file_recovery_new->extension="pm"; } return 1; }
static const unsigned char *EBML_find(const unsigned char *buffer, const unsigned int buffer_size, const unsigned char *EBML_Header, const unsigned int EBML_size) { const unsigned char *tmp=(const unsigned char *)td_memmem(buffer, buffer_size, EBML_Header, EBML_size); if(tmp==NULL) return NULL; return tmp+EBML_size; }
static int header_check_fob(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { static const unsigned char sign_navnl[5] = {'N','A','V','N','L'}; static const unsigned char sign_navw[4] = {'N','A','V','W'}; unsigned int tmp=0; const unsigned char *pos1=(const unsigned char *)td_memmem(buffer, buffer_size, sign_navnl, sizeof(sign_navnl)); const unsigned char *pos2=(const unsigned char *)td_memmem(buffer, buffer_size, sign_navw, sizeof(sign_navw)); if(pos1==NULL && pos2==NULL) return 0; if(pos1!=NULL) tmp=pos1-buffer; if(pos2!=NULL && pos2-buffer > tmp) tmp=pos2-buffer; reset_file_recovery(file_recovery_new); file_recovery_new->extension=file_hint_fob.extension; file_recovery_new->min_filesize=tmp; return 1; }
static int header_check_fob(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { if((memcmp(buffer, magic_codeunit, sizeof(magic_codeunit))==0 || memcmp(buffer, magic_dataport, sizeof(magic_dataport))==0 || memcmp(buffer, magic_form, sizeof(magic_form))==0 || memcmp(buffer, magic_menusuite, sizeof(magic_menusuite))==0 || memcmp(buffer, magic_report, sizeof(magic_report))==0 || memcmp(buffer, magic_table, sizeof(magic_table))==0 || memcmp(buffer, magic_xmlport, sizeof(magic_xmlport))==0) && (td_memmem(buffer, buffer_size, sign_navnl, sizeof(sign_navnl))!=NULL || td_memmem(buffer, buffer_size, sign_navw, sizeof(sign_navw))!=NULL)) { reset_file_recovery(file_recovery_new); file_recovery_new->extension=file_hint_fob.extension; return 1; } return 0; }
static int header_check_pdf(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { if(memcmp(buffer,pdf_header,sizeof(pdf_header))==0) { const unsigned char sig_linearized[10]={'L','i','n','e','a','r','i','z','e','d'}; const unsigned char *src; reset_file_recovery(file_recovery_new); if(td_memmem(buffer, buffer_size, "<</Illustrator ", 15) != NULL) file_recovery_new->extension="ai"; else { file_recovery_new->extension=file_hint_pdf.extension; file_recovery_new->file_rename=&file_rename_pdf; } if((src=(const unsigned char *)td_memmem(buffer, 512, sig_linearized, sizeof(sig_linearized))) != NULL) { src+=sizeof(sig_linearized); for(; src<=buffer+512 && *src!='>'; src++) { if(*src=='/' && *(src+1)=='L') { src+=2; while(src<buffer+512 && (*src==' ' || *src=='\t' || *src=='\n' || *src=='\r')) src++; file_recovery_new->calculated_file_size=0; while(src<buffer+512 && *src>='0' && *src<='9') { file_recovery_new->calculated_file_size=file_recovery_new->calculated_file_size*10+(*src)-'0'; src++; } file_recovery_new->data_check=&data_check_size; file_recovery_new->file_check=&file_check_pdf_and_size; return 1; } } } file_recovery_new->file_check=&file_check_pdf; return 1; } return 0; }
static int header_check_snz(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { const unsigned int buffer_size_test=(buffer_size < 512? buffer_size : 512); const unsigned char *pos=(const unsigned char *)td_memmem(buffer, buffer_size_test, ".snz", 4); if(pos==NULL) return 0; reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; file_recovery_new->extension="snz"; file_recovery_new->min_filesize=pos-buffer; return 1; }
static int header_check_stl(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { const unsigned int buffer_size_test=(buffer_size < 512? buffer_size : 512); if(td_memmem(buffer, buffer_size_test, "facet normal", 12)==NULL) return 0; /* StereoLithography - STL Ascii format * http://www.ennex.com/~fabbers/StL.asp */ reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; file_recovery_new->extension="stl"; return 1; }
static int header_check_doc(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { const struct OLE_HDR *header=(const struct OLE_HDR *)buffer; if(memcmp(buffer,doc_header,sizeof(doc_header))!=0) return 0; /* Check for Little Endian */ if(le16(header->uByteOrder)!=0xFFFE) return 0; if(le16(header->uDllVersion)!=3 && le16(header->uDllVersion)!=4) return 0; if(le16(header->reserved)!=0 || le32(header->reserved1)!=0) return 0; if(le16(header->uMiniSectorShift)!=6) return 0; if(le16(header->uDllVersion)==3 && le16(header->uSectorShift)!=9) return 0; /* max and qbb file have uSectorShift=12 */ if(le16(header->uDllVersion)==4 && le16(header->uSectorShift)!=12) return 0; if(le16(header->uDllVersion)==3 && le32(header->csectDir)!=0) return 0; /* max file have csectDir=1 * qbb file have csectDir=4 */ if(le16(header->uDllVersion)==4 && le32(header->csectDir)==0) return 0; /* num_FAT_blocks=109+num_extra_FAT_blocks*(512-1); maximum file size is 512+(num_FAT_blocks*128)*512, about 1.6GB */ if(le32(header->num_FAT_blocks)==0 || le32(header->num_extra_FAT_blocks)>50 || le32(header->num_FAT_blocks)>109+le32(header->num_extra_FAT_blocks)*((1<<le16(header->uSectorShift))-1)) return 0; reset_file_recovery(file_recovery_new); file_recovery_new->file_check=&file_check_doc; file_recovery_new->file_rename=&file_rename_doc; file_recovery_new->extension=ole_get_file_extension(buffer, buffer_size); if(file_recovery_new->extension!=NULL) { if(strcmp(file_recovery_new->extension,"sda")==0) { if(td_memmem(buffer,buffer_size,"StarImpress",11)!=NULL) file_recovery_new->extension="sdd"; } else if(strcmp(file_recovery_new->extension,"wps")==0) { /* Distinguish between MS Works .wps and MS Publisher .pub */ if(td_memmem(buffer,buffer_size,"Microsoft Publisher",19)!=NULL) file_recovery_new->extension="pub"; } return 1; } if(td_memmem(buffer,buffer_size,"WordDocument",12)!=NULL) { file_recovery_new->extension="doc"; } else if(td_memmem(buffer,buffer_size,"StarDraw",8)!=NULL) { file_recovery_new->extension="sda"; } else if(td_memmem(buffer,buffer_size,"StarCalc",8)!=NULL) { file_recovery_new->extension="sdc"; } else if(td_memmem(buffer,buffer_size,"StarImpress",11)!=NULL) { file_recovery_new->extension="sdd"; } else if(td_memmem(buffer,buffer_size,"Worksheet",9)!=NULL || td_memmem(buffer,buffer_size,"Book",4)!=NULL || td_memmem(buffer,buffer_size,"Workbook",8)!=NULL || td_memmem(buffer,buffer_size,"Calc",4)!=NULL) { file_recovery_new->extension="xls"; } else if(td_memmem(buffer,buffer_size,"Power",5)!=NULL) { file_recovery_new->extension="ppt"; } else if(td_memmem(buffer,buffer_size,"AccessObjSiteData",17)!=NULL) { file_recovery_new->extension="mdb"; } else if(td_memmem(buffer,buffer_size,"Visio",5)!=NULL) { file_recovery_new->extension="vsd"; } else if(td_memmem(buffer,buffer_size,"SfxDocument",11)!=NULL) { file_recovery_new->extension="sdw"; } else if(td_memmem(buffer,buffer_size,"CPicPage",8)!=NULL) { /* Flash Project File */ file_recovery_new->extension="fla"; } else if(td_memmem(buffer,buffer_size,"Microsoft Publisher",19)!=NULL) { /* Publisher */ file_recovery_new->extension="pub"; } else if(td_memmem(buffer, buffer_size, "Microsoft Works Database", 24)!=NULL || td_memmem( buffer, buffer_size, "MSWorksDBDoc", 12)!=NULL) { /* Microsoft Works .wdb */ file_recovery_new->extension="wdb"; } else if(td_memmem(buffer,buffer_size,"MetaStock",9)!=NULL) { /* MetaStock */ file_recovery_new->extension="mws"; } else file_recovery_new->extension=file_hint_doc.extension; return 1; }
static int header_check_txt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { static char *buffer_lower=NULL; static unsigned int buffer_lower_size=0; unsigned int l; const unsigned int buffer_size_test=(buffer_size < 2048 ? buffer_size : 2048); { unsigned int i; unsigned int tmp=0; for(i=0;i<10 && isdigit(buffer[i]);i++) tmp=tmp*10+buffer[i]-'0'; if(buffer[i]==0x0a && (memcmp(buffer+i+1, "Return-Path: ", 13)==0 || memcmp(buffer+i+1, "Received: from", 14)==0) && !(file_recovery->file_stat!=NULL && file_recovery->file_stat->file_hint==&file_hint_fasttxt && strcmp(file_recovery->extension,"mbox")==0)) { reset_file_recovery(file_recovery_new); file_recovery_new->calculated_file_size=tmp+i+1; file_recovery_new->data_check=NULL; file_recovery_new->file_check=&file_check_emlx; /* Mac OSX mail */ file_recovery_new->extension="emlx"; return 1; } } if(strncasecmp((const char *)buffer, "@echo off", 9)==0) { if(buffer[9]=='\0') return 0; reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Dos/Windows bath */ file_recovery_new->extension="bat"; return 1; } if(strncasecmp((const char *)buffer, "<%@ language=\"vbscript", 22)==0) { if(buffer[22]=='\0') return 0; reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Microsoft Active Server Pages */ file_recovery_new->extension="asp"; return 1; } if(strncasecmp((const char *)buffer, "version 4.00\r\nbegin", 19)==0) { if(buffer[19]=='\0') return 0; reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Microsoft Visual Basic */ file_recovery_new->extension="vb"; return 1; } if(strncasecmp((const char *)buffer, "begin:vcard", 11)==0) { if(buffer[11]=='\0') return 0; reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* vcard, electronic business cards */ file_recovery_new->extension="vcf"; return 1; } if(buffer[0]=='#' && buffer[1]=='!') { unsigned int ll=512-2; const unsigned char *haystack=(const unsigned char *)buffer+2; const unsigned char *res; res=(const unsigned char *)memchr(haystack,'\n',ll); if(res!=NULL) ll=res-haystack; if(td_memmem(haystack, ll, "perl", 4) != NULL) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Perl script */ file_recovery_new->extension="pl"; return 1; } if(td_memmem(haystack, ll, "python", 6) != NULL) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Python script */ file_recovery_new->extension="py"; return 1; } if(td_memmem(haystack, ll, "ruby", 4) != NULL) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Ruby script */ file_recovery_new->extension="rb"; return 1; } } if(safe_header_only!=0) { return 0; } if(file_recovery->file_stat!=NULL) { if(file_recovery->file_stat->file_hint == &file_hint_doc) { return 0; } else if(file_recovery->file_stat->file_hint == &file_hint_fasttxt || file_recovery->file_stat->file_hint == &file_hint_txt) { if(strstr(file_recovery->filename,".html")==NULL) return 0; } else if(file_recovery->file_stat->file_hint == &file_hint_jpg) { /* Don't search text at the beginning of JPG */ if(file_recovery->file_size < file_recovery->min_filesize) return 0; /* Text should not be found in JPEG */ if(td_memmem(buffer, buffer_size_test, "8BIM", 4)!=NULL || td_memmem(buffer, buffer_size_test, "adobe", 5)!=NULL || td_memmem(buffer, buffer_size_test, "exif:", 5)!=NULL || td_memmem(buffer, buffer_size_test, "<rdf:", 5)!=NULL || td_memmem(buffer, buffer_size_test, "<?xpacket", 9)!=NULL || td_memmem(buffer, buffer_size_test, "<dict>", 6)!=NULL || td_memmem(buffer, buffer_size_test, "xmp:CreatorTool>", 16)!=NULL || td_memmem(buffer, buffer_size_test, "[camera info]", 13)!=NULL) return 0; } else return 0; } if(buffer_lower_size<buffer_size_test+16) { free(buffer_lower); buffer_lower=NULL; } /* Don't malloc/free memory every time, small memory leak */ if(buffer_lower==NULL) { buffer_lower_size=buffer_size_test+16; buffer_lower=(char *)MALLOC(buffer_lower_size); } l=UTF2Lat((unsigned char*)buffer_lower, buffer, buffer_size_test); if(l<10) return 0; { unsigned int line_nbr=0; unsigned int i; for(i=0; i<512 && i<l; i++) { if(buffer[i]=='\n') line_nbr++; } /* A text file must contains several lines */ if(line_nbr==0) return 0; } if(strncasecmp((const char *)buffer, "rem ", 4)==0) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Dos/Windows bath */ file_recovery_new->extension="bat"; return 1; } if(strncasecmp((const char *)buffer, "dn: ", 4)==0) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; file_recovery_new->extension="ldif"; return 1; } { const char *ext=NULL; /* ind=~0: random * ind=~1: constant */ double ind=1; unsigned int nbrf=0; unsigned int is_csv=1; char *str; /* Detect Fortran */ { str=buffer_lower; while((str=strstr(str, "\n "))!=NULL) { nbrf++; str++; } } /* Detect csv */ { unsigned int csv_per_line_current=0; unsigned int csv_per_line=0; unsigned int line_nbr=0; unsigned int i; for(i=0;i<l && is_csv>0;i++) { if(buffer_lower[i]==';') { csv_per_line_current++; } else if(buffer_lower[i]=='\n') { if(line_nbr==0) csv_per_line=csv_per_line_current; if(csv_per_line_current!=csv_per_line) is_csv=0; line_nbr++; csv_per_line_current=0; } } if(csv_per_line<1 || line_nbr<10) is_csv=0; } /* if(l>1) */ { unsigned int stats[256]; unsigned int i; memset(&stats, 0, sizeof(stats)); for(i=0;i<l;i++) stats[(unsigned char)buffer_lower[i]]++; ind=0; for(i=0;i<256;i++) if(stats[i]>0) ind+=stats[i]*(stats[i]-1); ind=ind/l/(l-1); } /* Windows Autorun */ if(strstr(buffer_lower, "[autorun]")!=NULL) ext="inf"; /* Detect .ini */ else if(buffer[0]=='[' && l>50 && is_ini(buffer_lower)) ext="ini"; /* php (Hypertext Preprocessor) script */ else if(strstr(buffer_lower, "<?php")!=NULL) ext="php"; /* Comma separated values */ else if(is_csv>0) ext="csv"; /* Detect LaTeX, C, PHP, JSP, ASP, HTML, C header */ else if(strstr(buffer_lower, "\\begin{")!=NULL) ext="tex"; else if(strstr(buffer_lower, "#include")!=NULL) ext="c"; else if(l>20 && strstr(buffer_lower, "<%@")!=NULL) ext="jsp"; else if(l>20 && strstr(buffer_lower, "<%=")!=NULL) ext="jsp"; else if(l>20 && strstr(buffer_lower, "<% ")!=NULL) ext="asp"; else if(strstr(buffer_lower, "<html")!=NULL) ext="html"; else if(strstr(buffer_lower, "private static")!=NULL || strstr(buffer_lower, "public interface")!=NULL) { #ifdef DJGPP ext="jav"; #else ext="java"; #endif } else if((str=strstr(buffer_lower, "\nimport "))!=NULL) { str+=8; while(*str!='\0' && *str!='\n' && *str!=';') str++; if(*str==';') ext="java"; else ext="py"; } else if(strstr(buffer_lower, "class ")!=NULL && (l>=100 || file_recovery->file_stat==NULL)) { #ifdef DJGPP ext="jav"; #else ext="java"; #endif } /* Fortran */ else if(nbrf>10 && ind<0.9 && strstr(buffer_lower, "integer")!=NULL) ext="f"; /* LilyPond http://lilypond.org*/ else if(strstr(buffer_lower, "\\score {")!=NULL) ext="ly"; /* C header file */ else if(strstr(buffer_lower, "/*")!=NULL && l>50) ext="h"; else if(l<100 || ind<0.03 || ind>0.90) ext=NULL; /* JavaScript Object Notation */ else if(memcmp(buffer_lower, "{\"", 2)==0) ext="json"; else ext=file_hint_txt.extension; if(ext==NULL) return 0; if(strcmp(ext,"txt")==0 && (strstr(buffer_lower,"<br>")!=NULL || strstr(buffer_lower,"<p>")!=NULL)) { ext="html"; } if(file_recovery->file_stat!=NULL) { if(file_recovery->file_stat->file_hint == &file_hint_doc) { unsigned int i; unsigned int txt_nl=0; /* file_recovery->filename is .doc */ if(ind>0.20) return 0; /* Unix: \n (0xA) * Dos: \r\n (0xD 0xA) * Doc: \r (0xD) */ for(i=0; i<l-1; i++) { if(buffer_lower[i]=='\r' && buffer_lower[i+1]!='\n') return 0; } for(i=0; i<l && i<512; i++) if(buffer_lower[i]=='\n') txt_nl++; if(txt_nl<=1) return 0; } else if(file_recovery->file_stat->file_hint == &file_hint_fasttxt || file_recovery->file_stat->file_hint == &file_hint_txt) { /* file_recovery->filename is a .html */ buffer_lower[511]='\0'; if(strstr(buffer_lower, "<html")==NULL) return 0; /* Special case: two consecutive HTML files */ } } reset_file_recovery(file_recovery_new); if(strcmp(ext, "html")==0) { file_recovery_new->file_rename=&file_rename_html; file_recovery_new->data_check=&data_check_html; } else file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; file_recovery_new->extension=ext; return 1; } }