static data_check_t data_check_win(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery) { unsigned int i; char *buffer_lower=(char *)MALLOC(buffer_size+16); unsigned int offset=0; if(file_recovery->calculated_file_size==0) offset=3; i=UTF2Lat((unsigned char*)buffer_lower, &buffer[buffer_size/2+offset], buffer_size/2-offset); if(i<buffer_size/2-offset) { if(i>=10) file_recovery->calculated_file_size=file_recovery->file_size+offset+i; free(buffer_lower); return DC_STOP; } free(buffer_lower); file_recovery->calculated_file_size=file_recovery->file_size+(buffer_size/2); return DC_CONTINUE; }
static int header_check_txt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { static char *buffer_lower=NULL; static unsigned int buffer_lower_size=0; unsigned int l; const unsigned int buffer_size_test=(buffer_size < 2048 ? buffer_size : 2048); { unsigned int i; unsigned int tmp=0; for(i=0;i<10 && isdigit(buffer[i]);i++) tmp=tmp*10+buffer[i]-'0'; if(buffer[i]==0x0a && (memcmp(buffer+i+1, "Return-Path: ", 13)==0 || memcmp(buffer+i+1, "Received: from", 14)==0) && !(file_recovery->file_stat!=NULL && file_recovery->file_stat->file_hint==&file_hint_fasttxt && strcmp(file_recovery->extension,"mbox")==0)) { reset_file_recovery(file_recovery_new); file_recovery_new->calculated_file_size=tmp+i+1; file_recovery_new->data_check=NULL; file_recovery_new->file_check=&file_check_emlx; /* Mac OSX mail */ file_recovery_new->extension="emlx"; return 1; } } if(strncasecmp((const char *)buffer, "@echo off", 9)==0) { if(buffer[9]=='\0') return 0; reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Dos/Windows bath */ file_recovery_new->extension="bat"; return 1; } if(strncasecmp((const char *)buffer, "<%@ language=\"vbscript", 22)==0) { if(buffer[22]=='\0') return 0; reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Microsoft Active Server Pages */ file_recovery_new->extension="asp"; return 1; } if(strncasecmp((const char *)buffer, "version 4.00\r\nbegin", 19)==0) { if(buffer[19]=='\0') return 0; reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Microsoft Visual Basic */ file_recovery_new->extension="vb"; return 1; } if(strncasecmp((const char *)buffer, "begin:vcard", 11)==0) { if(buffer[11]=='\0') return 0; reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* vcard, electronic business cards */ file_recovery_new->extension="vcf"; return 1; } if(buffer[0]=='#' && buffer[1]=='!') { unsigned int ll=512-2; const unsigned char *haystack=(const unsigned char *)buffer+2; const unsigned char *res; res=(const unsigned char *)memchr(haystack,'\n',ll); if(res!=NULL) ll=res-haystack; if(td_memmem(haystack, ll, "perl", 4) != NULL) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Perl script */ file_recovery_new->extension="pl"; return 1; } if(td_memmem(haystack, ll, "python", 6) != NULL) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Python script */ file_recovery_new->extension="py"; return 1; } if(td_memmem(haystack, ll, "ruby", 4) != NULL) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Ruby script */ file_recovery_new->extension="rb"; return 1; } } if(safe_header_only!=0) { return 0; } if(file_recovery->file_stat!=NULL) { if(file_recovery->file_stat->file_hint == &file_hint_doc) { return 0; } else if(file_recovery->file_stat->file_hint == &file_hint_fasttxt || file_recovery->file_stat->file_hint == &file_hint_txt) { if(strstr(file_recovery->filename,".html")==NULL) return 0; } else if(file_recovery->file_stat->file_hint == &file_hint_jpg) { /* Don't search text at the beginning of JPG */ if(file_recovery->file_size < file_recovery->min_filesize) return 0; /* Text should not be found in JPEG */ if(td_memmem(buffer, buffer_size_test, "8BIM", 4)!=NULL || td_memmem(buffer, buffer_size_test, "adobe", 5)!=NULL || td_memmem(buffer, buffer_size_test, "exif:", 5)!=NULL || td_memmem(buffer, buffer_size_test, "<rdf:", 5)!=NULL || td_memmem(buffer, buffer_size_test, "<?xpacket", 9)!=NULL || td_memmem(buffer, buffer_size_test, "<dict>", 6)!=NULL || td_memmem(buffer, buffer_size_test, "xmp:CreatorTool>", 16)!=NULL || td_memmem(buffer, buffer_size_test, "[camera info]", 13)!=NULL) return 0; } else return 0; } if(buffer_lower_size<buffer_size_test+16) { free(buffer_lower); buffer_lower=NULL; } /* Don't malloc/free memory every time, small memory leak */ if(buffer_lower==NULL) { buffer_lower_size=buffer_size_test+16; buffer_lower=(char *)MALLOC(buffer_lower_size); } l=UTF2Lat((unsigned char*)buffer_lower, buffer, buffer_size_test); if(l<10) return 0; { unsigned int line_nbr=0; unsigned int i; for(i=0; i<512 && i<l; i++) { if(buffer[i]=='\n') line_nbr++; } /* A text file must contains several lines */ if(line_nbr==0) return 0; } if(strncasecmp((const char *)buffer, "rem ", 4)==0) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; /* Dos/Windows bath */ file_recovery_new->extension="bat"; return 1; } if(strncasecmp((const char *)buffer, "dn: ", 4)==0) { reset_file_recovery(file_recovery_new); file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; file_recovery_new->extension="ldif"; return 1; } { const char *ext=NULL; /* ind=~0: random * ind=~1: constant */ double ind=1; unsigned int nbrf=0; unsigned int is_csv=1; char *str; /* Detect Fortran */ { str=buffer_lower; while((str=strstr(str, "\n "))!=NULL) { nbrf++; str++; } } /* Detect csv */ { unsigned int csv_per_line_current=0; unsigned int csv_per_line=0; unsigned int line_nbr=0; unsigned int i; for(i=0;i<l && is_csv>0;i++) { if(buffer_lower[i]==';') { csv_per_line_current++; } else if(buffer_lower[i]=='\n') { if(line_nbr==0) csv_per_line=csv_per_line_current; if(csv_per_line_current!=csv_per_line) is_csv=0; line_nbr++; csv_per_line_current=0; } } if(csv_per_line<1 || line_nbr<10) is_csv=0; } /* if(l>1) */ { unsigned int stats[256]; unsigned int i; memset(&stats, 0, sizeof(stats)); for(i=0;i<l;i++) stats[(unsigned char)buffer_lower[i]]++; ind=0; for(i=0;i<256;i++) if(stats[i]>0) ind+=stats[i]*(stats[i]-1); ind=ind/l/(l-1); } /* Windows Autorun */ if(strstr(buffer_lower, "[autorun]")!=NULL) ext="inf"; /* Detect .ini */ else if(buffer[0]=='[' && l>50 && is_ini(buffer_lower)) ext="ini"; /* php (Hypertext Preprocessor) script */ else if(strstr(buffer_lower, "<?php")!=NULL) ext="php"; /* Comma separated values */ else if(is_csv>0) ext="csv"; /* Detect LaTeX, C, PHP, JSP, ASP, HTML, C header */ else if(strstr(buffer_lower, "\\begin{")!=NULL) ext="tex"; else if(strstr(buffer_lower, "#include")!=NULL) ext="c"; else if(l>20 && strstr(buffer_lower, "<%@")!=NULL) ext="jsp"; else if(l>20 && strstr(buffer_lower, "<%=")!=NULL) ext="jsp"; else if(l>20 && strstr(buffer_lower, "<% ")!=NULL) ext="asp"; else if(strstr(buffer_lower, "<html")!=NULL) ext="html"; else if(strstr(buffer_lower, "private static")!=NULL || strstr(buffer_lower, "public interface")!=NULL) { #ifdef DJGPP ext="jav"; #else ext="java"; #endif } else if((str=strstr(buffer_lower, "\nimport "))!=NULL) { str+=8; while(*str!='\0' && *str!='\n' && *str!=';') str++; if(*str==';') ext="java"; else ext="py"; } else if(strstr(buffer_lower, "class ")!=NULL && (l>=100 || file_recovery->file_stat==NULL)) { #ifdef DJGPP ext="jav"; #else ext="java"; #endif } /* Fortran */ else if(nbrf>10 && ind<0.9 && strstr(buffer_lower, "integer")!=NULL) ext="f"; /* LilyPond http://lilypond.org*/ else if(strstr(buffer_lower, "\\score {")!=NULL) ext="ly"; /* C header file */ else if(strstr(buffer_lower, "/*")!=NULL && l>50) ext="h"; else if(l<100 || ind<0.03 || ind>0.90) ext=NULL; /* JavaScript Object Notation */ else if(memcmp(buffer_lower, "{\"", 2)==0) ext="json"; else ext=file_hint_txt.extension; if(ext==NULL) return 0; if(strcmp(ext,"txt")==0 && (strstr(buffer_lower,"<br>")!=NULL || strstr(buffer_lower,"<p>")!=NULL)) { ext="html"; } if(file_recovery->file_stat!=NULL) { if(file_recovery->file_stat->file_hint == &file_hint_doc) { unsigned int i; unsigned int txt_nl=0; /* file_recovery->filename is .doc */ if(ind>0.20) return 0; /* Unix: \n (0xA) * Dos: \r\n (0xD 0xA) * Doc: \r (0xD) */ for(i=0; i<l-1; i++) { if(buffer_lower[i]=='\r' && buffer_lower[i+1]!='\n') return 0; } for(i=0; i<l && i<512; i++) if(buffer_lower[i]=='\n') txt_nl++; if(txt_nl<=1) return 0; } else if(file_recovery->file_stat->file_hint == &file_hint_fasttxt || file_recovery->file_stat->file_hint == &file_hint_txt) { /* file_recovery->filename is a .html */ buffer_lower[511]='\0'; if(strstr(buffer_lower, "<html")==NULL) return 0; /* Special case: two consecutive HTML files */ } } reset_file_recovery(file_recovery_new); if(strcmp(ext, "html")==0) { file_recovery_new->file_rename=&file_rename_html; file_recovery_new->data_check=&data_check_html; } else file_recovery_new->data_check=&data_check_txt; file_recovery_new->file_check=&file_check_size; file_recovery_new->extension=ext; return 1; } }