示例#1
0
static data_check_t data_check_win(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery)
{
  unsigned int i;
  char *buffer_lower=(char *)MALLOC(buffer_size+16);
  unsigned int offset=0;
  if(file_recovery->calculated_file_size==0)
    offset=3;
  i=UTF2Lat((unsigned char*)buffer_lower, &buffer[buffer_size/2+offset], buffer_size/2-offset);
  if(i<buffer_size/2-offset)
  {
    if(i>=10)
      file_recovery->calculated_file_size=file_recovery->file_size+offset+i;
    free(buffer_lower);
    return DC_STOP;
  }
  free(buffer_lower);
  file_recovery->calculated_file_size=file_recovery->file_size+(buffer_size/2);
  return DC_CONTINUE;
}
示例#2
0
static int header_check_txt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
{
  static char *buffer_lower=NULL;
  static unsigned int buffer_lower_size=0;
  unsigned int l;
  const unsigned int buffer_size_test=(buffer_size < 2048 ? buffer_size : 2048);
  {
    unsigned int i;
    unsigned int tmp=0;
    for(i=0;i<10 && isdigit(buffer[i]);i++)
      tmp=tmp*10+buffer[i]-'0';
    if(buffer[i]==0x0a &&
      (memcmp(buffer+i+1, "Return-Path: ", 13)==0 ||
       memcmp(buffer+i+1, "Received: from", 14)==0) &&
        !(file_recovery->file_stat!=NULL &&
          file_recovery->file_stat->file_hint==&file_hint_fasttxt &&
          strcmp(file_recovery->extension,"mbox")==0))
    {
      reset_file_recovery(file_recovery_new);
      file_recovery_new->calculated_file_size=tmp+i+1;
      file_recovery_new->data_check=NULL;
      file_recovery_new->file_check=&file_check_emlx;
      /* Mac OSX mail */
      file_recovery_new->extension="emlx";
      return 1;
    }
  }
  if(strncasecmp((const char *)buffer, "@echo off", 9)==0)
  {
    if(buffer[9]=='\0')
      return 0;
    reset_file_recovery(file_recovery_new);
    file_recovery_new->data_check=&data_check_txt;
    file_recovery_new->file_check=&file_check_size;
    /* Dos/Windows bath */
    file_recovery_new->extension="bat";
    return 1;
  }
  if(strncasecmp((const char *)buffer, "<%@ language=\"vbscript", 22)==0)
  {
    if(buffer[22]=='\0')
      return 0;
    reset_file_recovery(file_recovery_new);
    file_recovery_new->data_check=&data_check_txt;
    file_recovery_new->file_check=&file_check_size;
    /* Microsoft Active Server Pages */
    file_recovery_new->extension="asp";
    return 1;
  }
  if(strncasecmp((const char *)buffer, "version 4.00\r\nbegin", 19)==0)
  {
    if(buffer[19]=='\0')
      return 0;
    reset_file_recovery(file_recovery_new);
    file_recovery_new->data_check=&data_check_txt;
    file_recovery_new->file_check=&file_check_size;
    /* Microsoft Visual Basic */
    file_recovery_new->extension="vb";
    return 1;
  }
  if(strncasecmp((const char *)buffer, "begin:vcard", 11)==0)
  {
    if(buffer[11]=='\0')
      return 0;
    reset_file_recovery(file_recovery_new);
    file_recovery_new->data_check=&data_check_txt;
    file_recovery_new->file_check=&file_check_size;
    /* vcard, electronic business cards */
    file_recovery_new->extension="vcf";
    return 1;
  }
  if(buffer[0]=='#' && buffer[1]=='!')
  {
    unsigned int ll=512-2;
    const unsigned char *haystack=(const unsigned char *)buffer+2;
    const unsigned char *res;
    res=(const unsigned char *)memchr(haystack,'\n',ll);
    if(res!=NULL)
      ll=res-haystack;
    if(td_memmem(haystack, ll, "perl", 4) != NULL)
    {
      reset_file_recovery(file_recovery_new);
      file_recovery_new->data_check=&data_check_txt;
      file_recovery_new->file_check=&file_check_size;
      /* Perl script */
      file_recovery_new->extension="pl";
      return 1;
    }
    if(td_memmem(haystack, ll, "python", 6) != NULL)
    {
      reset_file_recovery(file_recovery_new);
      file_recovery_new->data_check=&data_check_txt;
      file_recovery_new->file_check=&file_check_size;
      /* Python script */
      file_recovery_new->extension="py";
      return 1;
    }
    if(td_memmem(haystack, ll, "ruby", 4) != NULL)
    {
      reset_file_recovery(file_recovery_new);
      file_recovery_new->data_check=&data_check_txt;
      file_recovery_new->file_check=&file_check_size;
      /* Ruby script */
      file_recovery_new->extension="rb";
      return 1;
    }
  }
  if(safe_header_only!=0)
  {
    return 0;
  }
  if(file_recovery->file_stat!=NULL)
  {
    if(file_recovery->file_stat->file_hint == &file_hint_doc)
    {
      return 0;
    }
    else if(file_recovery->file_stat->file_hint == &file_hint_fasttxt ||
	file_recovery->file_stat->file_hint == &file_hint_txt)
    {
      if(strstr(file_recovery->filename,".html")==NULL)
	return 0;
    }
    else if(file_recovery->file_stat->file_hint == &file_hint_jpg)
    {
      /* Don't search text at the beginning of JPG */
      if(file_recovery->file_size < file_recovery->min_filesize)
	return 0;
      /* Text should not be found in JPEG */
      if(td_memmem(buffer, buffer_size_test, "8BIM", 4)!=NULL ||
	  td_memmem(buffer, buffer_size_test, "adobe", 5)!=NULL ||
	  td_memmem(buffer, buffer_size_test, "exif:", 5)!=NULL ||
	  td_memmem(buffer, buffer_size_test, "<rdf:", 5)!=NULL ||
	  td_memmem(buffer, buffer_size_test, "<?xpacket", 9)!=NULL ||
	  td_memmem(buffer, buffer_size_test, "<dict>", 6)!=NULL ||
	  td_memmem(buffer, buffer_size_test, "xmp:CreatorTool>", 16)!=NULL ||
	  td_memmem(buffer, buffer_size_test, "[camera info]", 13)!=NULL)
	return 0;
    }
    else
      return 0;
  }
  if(buffer_lower_size<buffer_size_test+16)
  {
    free(buffer_lower);
    buffer_lower=NULL;
  }
  /* Don't malloc/free memory every time, small memory leak */
  if(buffer_lower==NULL)
  {
    buffer_lower_size=buffer_size_test+16;
    buffer_lower=(char *)MALLOC(buffer_lower_size);
  }
  l=UTF2Lat((unsigned char*)buffer_lower, buffer, buffer_size_test);
  if(l<10)
    return 0;
  {
    unsigned int line_nbr=0;
    unsigned int i;
    for(i=0; i<512 && i<l; i++)
    {
      if(buffer[i]=='\n')
	line_nbr++;
    }
    /* A text file must contains several lines */
    if(line_nbr==0)
      return 0;
  }
  if(strncasecmp((const char *)buffer, "rem ", 4)==0)
  {
    reset_file_recovery(file_recovery_new);
    file_recovery_new->data_check=&data_check_txt;
    file_recovery_new->file_check=&file_check_size;
    /* Dos/Windows bath */
    file_recovery_new->extension="bat";
    return 1;
  }
  if(strncasecmp((const char *)buffer, "dn: ", 4)==0)
  {
    reset_file_recovery(file_recovery_new);
    file_recovery_new->data_check=&data_check_txt;
    file_recovery_new->file_check=&file_check_size;
    file_recovery_new->extension="ldif";
    return 1;
  }
  {
    const char *ext=NULL;
    /* ind=~0: random
     * ind=~1: constant	*/
    double ind=1;
    unsigned int nbrf=0;
    unsigned int is_csv=1;
    char *str;
    /* Detect Fortran */
    {
      str=buffer_lower;
      while((str=strstr(str, "\n      "))!=NULL)
      {
	nbrf++;
	str++;
      }
    }
    /* Detect csv */
    {
      unsigned int csv_per_line_current=0;
      unsigned int csv_per_line=0;
      unsigned int line_nbr=0;
      unsigned int i;
      for(i=0;i<l && is_csv>0;i++)
      {
	if(buffer_lower[i]==';')
	{
	  csv_per_line_current++;
	}
	else if(buffer_lower[i]=='\n')
	{
	  if(line_nbr==0)
	    csv_per_line=csv_per_line_current;
	  if(csv_per_line_current!=csv_per_line)
	    is_csv=0;
	  line_nbr++;
	  csv_per_line_current=0;
	}
      }
      if(csv_per_line<1 || line_nbr<10)
	is_csv=0;
    }
    /* if(l>1) */
    {
      unsigned int stats[256];
      unsigned int i;
      memset(&stats, 0, sizeof(stats));
      for(i=0;i<l;i++)
	stats[(unsigned char)buffer_lower[i]]++;
      ind=0;
      for(i=0;i<256;i++)
	if(stats[i]>0)
	  ind+=stats[i]*(stats[i]-1);
      ind=ind/l/(l-1);
    }
    /* Windows Autorun */
    if(strstr(buffer_lower, "[autorun]")!=NULL)
      ext="inf";
    /* Detect .ini */
    else if(buffer[0]=='[' && l>50 && is_ini(buffer_lower))
      ext="ini";
    /* php (Hypertext Preprocessor) script */
    else if(strstr(buffer_lower, "<?php")!=NULL)
      ext="php";
    /* Comma separated values */
    else if(is_csv>0)
      ext="csv";
    /* Detect LaTeX, C, PHP, JSP, ASP, HTML, C header */
    else if(strstr(buffer_lower, "\\begin{")!=NULL)
      ext="tex";
    else if(strstr(buffer_lower, "#include")!=NULL)
      ext="c";
    else if(l>20 && strstr(buffer_lower, "<%@")!=NULL)
      ext="jsp";
    else if(l>20 && strstr(buffer_lower, "<%=")!=NULL)
      ext="jsp";
    else if(l>20 && strstr(buffer_lower, "<% ")!=NULL)
      ext="asp";
    else if(strstr(buffer_lower, "<html")!=NULL)
      ext="html";
    else if(strstr(buffer_lower, "private static")!=NULL ||
	strstr(buffer_lower, "public interface")!=NULL)
    {
#ifdef DJGPP
      ext="jav";
#else
      ext="java";
#endif
    }
    else if((str=strstr(buffer_lower, "\nimport "))!=NULL)
    {
      str+=8;
      while(*str!='\0' && *str!='\n' && *str!=';')
	str++;
      if(*str==';')
	ext="java";
      else
	ext="py";
    }
    else if(strstr(buffer_lower, "class ")!=NULL &&
	(l>=100 || file_recovery->file_stat==NULL))
    {
#ifdef DJGPP
      ext="jav";
#else
      ext="java";
#endif
    }
    /* Fortran */
    else if(nbrf>10 && ind<0.9 && strstr(buffer_lower, "integer")!=NULL)
      ext="f";
    /* LilyPond http://lilypond.org*/
    else if(strstr(buffer_lower, "\\score {")!=NULL)
      ext="ly";
    /* C header file */
    else if(strstr(buffer_lower, "/*")!=NULL && l>50)
      ext="h";
    else if(l<100 || ind<0.03 || ind>0.90)
      ext=NULL;
    /* JavaScript Object Notation  */
    else if(memcmp(buffer_lower, "{\"", 2)==0)
      ext="json";
    else
      ext=file_hint_txt.extension;
    if(ext==NULL)
      return 0;
    if(strcmp(ext,"txt")==0 &&
	(strstr(buffer_lower,"<br>")!=NULL || strstr(buffer_lower,"<p>")!=NULL))
    {
      ext="html";
    }
    if(file_recovery->file_stat!=NULL)
    {
      if(file_recovery->file_stat->file_hint == &file_hint_doc)
      {
	unsigned int i;
	unsigned int txt_nl=0;
	/* file_recovery->filename is .doc */
	if(ind>0.20)
	  return 0;
	/* Unix: \n (0xA)
	 * Dos: \r\n (0xD 0xA)
	 * Doc: \r (0xD) */
	for(i=0; i<l-1; i++)
	{
	  if(buffer_lower[i]=='\r' && buffer_lower[i+1]!='\n')
	    return 0;
	}
	for(i=0; i<l && i<512; i++)
	  if(buffer_lower[i]=='\n')
	    txt_nl++;
	if(txt_nl<=1)
	  return 0;
      }
      else if(file_recovery->file_stat->file_hint == &file_hint_fasttxt ||
	  file_recovery->file_stat->file_hint == &file_hint_txt)
      {
	/* file_recovery->filename is a .html */
	buffer_lower[511]='\0';
	if(strstr(buffer_lower, "<html")==NULL)
	  return 0;
	/* Special case: two consecutive HTML files */
      }
    }
    reset_file_recovery(file_recovery_new);
    if(strcmp(ext, "html")==0)
    {
      file_recovery_new->file_rename=&file_rename_html;
      file_recovery_new->data_check=&data_check_html;
    }
    else
      file_recovery_new->data_check=&data_check_txt;
    file_recovery_new->file_check=&file_check_size;
    file_recovery_new->extension=ext;
    return 1;
  }
}