示例#1
0
// conversion nom de fichier/dossier isolé vers 8-3 ou ISO9660
void longfile_to_83(int mode,char* n83,char* save) {
  int j=0,max=0;
  int i = 0;
  char nom[256];
  char ext[256];
  nom[0]=ext[0]='\0';
  
  switch(mode) {
  case 1:
    max=8;
    break;
  case 2:
    max=31;
    break;
  default:
    max=8;
    break;
  }

  /* No starting . */
  if (save[0] == '.') {
    save[0]='_';
  }
  /* No multiple dots */
  {
    char* last_dot=strrchr(save, '.');
    char* dot;
    while((dot=strchr(save, '.'))) {
      *dot = '_';
    }
    if (last_dot) {
      *last_dot='.';
    }
  }
  /* 
  Avoid: (ISO9660, but also suitable for 8-3)
  (Thanks to [email protected] for te hint)
  /:;?\#*~
  0x00-0x1f and 0x80-0xff
  */
  for(i = 0 ; save[i] != 0 ; i++) {
    char a = save[i];
    if (a >= 'a' && a <= 'z') {
      a -= 'a' - 'A';
    }
    else if ( ! ( (a >= 'A' && a <= 'Z') || (a >= '0' && a <= '9') || a == '_' || a == '.') ) {
      a = '_';
    }
    save[i] = a;
  }

  i=j=0;
  while((i<max) && (save[j]) && (save[j]!='.')) {
    if (save[j]!=' ') {
      nom[i]=save[j]; 
      i++; 
    } 
    j++; 
  }  // recopier nom
  nom[i]='\0';
  if (save[j]) {  // il reste au moins un point
    i = (int) strlen(save)-1;
    while((i>0) && (save[i]!='.') && (save[i]!='/')) i--;    // rechercher dernier .
    if (save[i]=='.') {  // point!
      int j=0;
      i++;
      while((j<3) && (save[i]) ) { if (save[i]!=' ') { ext[j]=save[i]; j++; } i++; }
      ext[j]='\0';
    }
  }
  // corriger vers 8-3
  n83[0]='\0';
  strncatbuff(n83,nom,max);
  if (strnotempty(ext)) {
    strcatbuff(n83,".");
    strncatbuff(n83,ext,3);    
  }
}
示例#2
0
// supercomparateur joker (tm)
// compare a et b (b=avec joker dedans), case insensitive [voir CI]
// renvoi l'adresse de la première lettre de la chaine
// (càd *[..]toto.. renvoi adresse de toto dans la chaine)
// accepte les délires du genre www.*.*/ * / * truc*.*
// cet algo est 'un peu' récursif mais ne consomme pas trop de tm
// * = toute lettre
// --?-- : spécifique à HTTrack et aux ?
HTS_INLINE const char *strjoker(const char *chaine, const char *joker, LLint * size,
                          int *size_flag) {
  //int err=0;
  if (strnotempty(joker) == 0) {        // fin de chaine joker
    if (strnotempty(chaine) == 0)       // fin aussi pour la chaine: ok
      return chaine;
    else if (chaine[0] == '?')
      return chaine;            // --?-- pour les index.html?Choix=2
    else
      return NULL;              // non trouvé
  }
  // on va progresser en suivant les 'mots' contenus dans le joker
  // un mot peut être un * ou bien toute autre séquence de lettres

  if (strcmp(joker, "*") == 0) {        // ok, rien après
    return chaine;
  }
  // 1er cas: jokers * ou jokers multiples *[..]
  if (joker[0] == '*') {        // comparer joker+reste (*toto/..)
    int jmp;                    // nombre de caractères pour le prochain mot dans joker
    int cut = 0;                // interdire tout caractère superflu
    char pass[256];
    char LEFT = '[', RIGHT = ']';
    int unique = 0;

    switch (joker[1]) {
    case '[':
      LEFT = '[';
      RIGHT = ']';
      unique = 0;
      break;
    case '(':
      LEFT = '(';
      RIGHT = ')';
      unique = 1;
      break;
    }

    if ((joker[1] == LEFT) && (joker[2] != LEFT)) {     // multijoker (tm)
      int i;

      for(i = 0; i < 256; i++)
        pass[i] = 0;

      // noms réservés
      if ((strfield(joker + 2, "file")) || (strfield(joker + 2, "name"))) {
        for(i = 0; i < 256; i++)
          pass[i] = 1;
        pass[(int) '?'] = 0;
        //pass[(int) ';'] = 0;
        pass[(int) '/'] = 0;
        i = 2;
        {
          int len = (int) strlen(joker);

          while((joker[i] != RIGHT) && (joker[i]) && (i < len))
            i++;
        }
      } else if (strfield(joker + 2, "path")) {
        for(i = 0; i < 256; i++)
          pass[i] = 1;
        pass[(int) '?'] = 0;
        //pass[(int) ';'] = 0;
        i = 2;
        {
          int len = (int) strlen(joker);

          while((joker[i] != RIGHT) && (joker[i]) && (i < len))
            i++;
        }
      } else if (strfield(joker + 2, "param")) {
        if (chaine[0] == '?') { // il y a un paramètre juste là
          for(i = 0; i < 256; i++)
            pass[i] = 1;
        }                       // sinon synonyme de 'rien'
        i = 2;
        {
          int len = (int) strlen(joker);

          while((joker[i] != RIGHT) && (joker[i]) && (i < len))
            i++;
        }
      } else {
        // décode les directives comme *[A-Z,âêîôû,0-9]
        i = 2;
        if (joker[i] == RIGHT) {        // *[] signifie "plus rien après"
          cut = 1;              // caractère supplémentaire interdit
        } else {
          int len = (int) strlen(joker);

          while((joker[i] != RIGHT) && (joker[i]) && (i < len)) {
            if ((joker[i] == '<') || (joker[i] == '>')) {       // *[<10]
              int lsize = 0;
              int lverdict;

              i++;
              if (sscanf(joker + i, "%d", &lsize) == 1) {
                if (size) {
                  if (*size >= 0) {
                    if (size_flag)
                      *size_flag = 1;   /* a joué */
                    if (joker[i - 1] == '<')
                      lverdict = (*size < lsize);
                    else
                      lverdict = (*size > lsize);
                    if (!lverdict) {
                      return NULL;      // ne correspond pas
                    } else {
                      *size = lsize;
                      return chaine;    // ok
                    }
                  } else
                    return NULL;        // ne correspond pas
                } else
                  return NULL;  // ne correspond pas (test impossible)
                // jump
                while(isdigit((unsigned char) joker[i]))
                  i++;
              }
            } else if (joker[i + 1] == '-') {   // 2 car, ex: *[A-Z]
              if ((int) (unsigned char) joker[i + 2] >
                  (int) (unsigned char) joker[i]) {
                int j;

                for(j = (int) (unsigned char) joker[i];
                    j <= (int) (unsigned char) joker[i + 2]; j++)
                  pass[j] = 1;

              }
              // else err=1;
              i += 3;
            } else {            // 1 car, ex: *[ ]
              if (joker[i + 2] == '\\' && joker[i + 3] != 0) {  // escaped char, such as *[\[] or *[\]]
                i++;
              }
              pass[(int) (unsigned char) joker[i]] = 1;
              i++;
            }
            if ((joker[i] == ',') || (joker[i] == ';'))
              i++;
          }
        }
      }
      // à sauter dans joker
      jmp = i;
      if (joker[i])
        jmp++;

      //
    } else {                    // tout autoriser
      //
      int i;

      for(i = 0; i < 256; i++)
        pass[i] = 1;            // tout autoriser
      jmp = 1;
      ////if (joker[2]==LEFT) jmp=3;        // permet de recher *<crochet ouvrant>
    }

    {
      int i, max;
      const char *adr;

      // la chaine doit se terminer exactement
      if (cut) {
        if (strnotempty(chaine))
          return NULL;          // perdu
        else
          return chaine;        // ok
      }
      // comparaison en boucle, c'est ca qui consomme huhu..
      // le tableau pass[256] indique les caractères ASCII autorisés

      // tester sans le joker (pas ()+ mais ()*)
      if (!unique) {
        if ((adr = strjoker(chaine, joker + jmp, size, size_flag))) {
          return adr;
        }
      }
      // tester
      i = 0;
      if (!unique)
        max = (int) strlen(chaine);
      else                      /* *(a) only match a (not aaaaa) */
        max = 1;
      while(i < (int) max) {
        if (pass[(int) (unsigned char) chaine[i]]) {    // caractère autorisé
          if ((adr = strjoker(chaine + i + 1, joker + jmp, size, size_flag))) {
            return adr;
          }
          i++;
        } else
          i = max + 2;          // sortir
      }

      // tester chaîne vide
      if (i != max + 2)         // avant c'est ok
        if ((adr = strjoker(chaine + max, joker + jmp, size, size_flag)))
          return adr;

      return NULL;              // perdu
    }

  } else {                      // comparer mot+reste (toto*..)
    if (strnotempty(chaine)) {
      int jmp = 0, ok = 1;

      // comparer début de joker et début de chaine
      while((joker[jmp] != '*') && (joker[jmp]) && (ok)) {
        // CI : remplacer streql par une comparaison !=
        if (!streql(chaine[jmp], joker[jmp])) {
          ok = 0;               // quitter
        }
        jmp++;
      }

      // comparaison ok?
      if (ok) {
        // continuer la comparaison.
        if (strjoker(chaine + jmp, joker + jmp, size, size_flag))
          return chaine;        // retourner 1e lettre
      }

    }                           // strlen(a)
    return NULL;
  }                             // * ou mot

  return NULL;
}
示例#3
0
// forme à partir d'un lien et du contexte (origin_fil et origin_adr d'où il est tiré) adr et fil
// [adr et fil sont des buffers de 1ko]
// 0 : ok
// -1 : erreur
// -2 : protocole non supporté (ftp)
int ident_url_relatif(const char *lien,const char* origin_adr,const char* origin_fil,char* adr,char* fil) {
  int ok=0;
  int scheme=0;

  adr[0]='\0'; fil[0]='\0';    //effacer buffers

  // lien non vide!
  if (strnotempty(lien)==0) return -1;    // erreur!

  // Scheme?
  {
    const char* a=lien;
    while (isalpha((unsigned char)*a))
      a++;
    if (*a == ':')
      scheme=1;
  }

  // filtrer les parazites (mailto & cie)
  // scheme+authority (//)
  if (
               (strfield(lien,"http://"))        // scheme+//
            || (strfield(lien,"file://"))   // scheme+//
            || (strncmp(lien,"//",2)==0)    // // sans scheme (-> default)
       ) {
    if (ident_url_absolute(lien,adr,fil)==-1) {        
      ok=-1;    // erreur URL
    }
  }
  else if (strfield(lien,"ftp://")) {
    // Note: ftp:foobar.gif is not valid
    if (ftp_available()) {     // ftp supporté
      if (ident_url_absolute(lien,adr,fil)==-1) {        
        ok=-1;    // erreur URL
      }
    } else {
      ok=-2;  // non supporté
    }
#if HTS_USEMMS
	} else if (strfield(lien,"mms://")) {
		if (ident_url_absolute(lien,adr,fil)==-1) {        
			ok=-1;    // erreur URL
		}
#endif
#if HTS_USEOPENSSL
  } else if (strfield(lien,"https://")) {
    if (SSL_is_available) {
      // Note: ftp:foobar.gif is not valid
      if (ident_url_absolute(lien,adr,fil)==-1) {        
        ok=-1;    // erreur URL
      }
    } else {
      ok=-1;
    }
#endif
  } else if ((scheme) && (
    (!strfield(lien,"http:"))
    && (!strfield(lien,"https:"))
    && (!strfield(lien,"ftp:"))
#if HTS_USEMMS
    && (!strfield(lien,"mms:"))
#endif
    )) {
    ok=-1;      // unknown scheme
  } else {    // c'est un lien relatif
    // On forme l'URL complète à partie de l'url actuelle
    // et du chemin actuel si besoin est.
    
    // copier adresse
    if (((int) strlen(origin_adr)<HTS_URLMAXSIZE) && ((int) strlen(origin_fil)<HTS_URLMAXSIZE) && ((int) strlen(lien)<HTS_URLMAXSIZE)) {

      /* patch scheme if necessary */
      if (strfield(lien,"http:")) {
        lien+=5;
        strcpybuff(adr, jump_protocol(origin_adr));    // même adresse ; protocole vide (http)
      } else if (strfield(lien,"https:")) {
        lien+=6;
        strcpybuff(adr, "https://");   // même adresse forcée en https
        strcatbuff(adr, jump_protocol(origin_adr));
      } else if (strfield(lien,"ftp:")) {
        lien+=4;
        strcpybuff(adr, "ftp://");   // même adresse forcée en ftp
        strcatbuff(adr, jump_protocol(origin_adr));
#if HTS_USEMMS
      } else if (strfield(lien,"mms:")) {
        lien+=4;
        strcpybuff(adr, "mms://");   // même adresse forcée en ftp
        strcatbuff(adr, jump_protocol(origin_adr));
#endif
      } else {
        strcpybuff(adr,origin_adr);    // même adresse ; et même éventuel protocole
      }
      
      if (*lien!='/') {  // sinon c'est un lien absolu
        if (*lien == '\0') {
          strcpybuff(fil,origin_fil);
        } else if (*lien == '?') {     // example: a href="?page=2"
          char* a;
          strcpybuff(fil,origin_fil);
          a=strchr(fil,'?');
          if (a) *a='\0';
          strcatbuff(fil,lien);
        } else {
          const char *a=strchr(origin_fil,'?');
          if (a == NULL) a=origin_fil+strlen(origin_fil);
          while((*a!='/') && ( a > origin_fil) ) a--;
          if (*a=='/') {    // ok on a un '/'
            if ( (((int) (a - origin_fil))+1+strlen(lien)) < HTS_URLMAXSIZE) {
              // copier chemin
              strncpy(fil,origin_fil,((int) (a - origin_fil))+1);
              *(fil + ((int) (a - origin_fil))+1)='\0';
              
              // copier chemin relatif
              if (((int) strlen(fil)+(int) strlen(lien)) < HTS_URLMAXSIZE) {
                strcatbuff(fil,lien + ((*lien=='/')?1:0) );      
                // simplifier url pour les ../
                fil_simplifie(fil);
              } else
                ok=-1;    // erreur
            } else {    // erreur
              ok=-1;    // erreur URL
            }
          } else {    // erreur
            ok=-1;    // erreur URL
          }
        }
      } else { // chemin absolu
        // copier chemin directement
        strcatbuff(fil,lien);      
        fil_simplifie(fil);
      }  // *lien!='/'
    } else
      ok=-1;
    
  }  // test news: etc.

  // case insensitive pour adresse
  {
    char *a=jump_identification(adr);
    while(*a) {
      if ((*a>='A') && (*a<='Z'))
        *a+='a'-'A';       
      a++;
    }
  }
  
  return ok;
}
示例#4
0
static int __cdecl htsshow_loop(t_hts_callbackarg *carg, httrackp *opt,
	lien_back* back, int back_max, int back_index, int lien_n,
	int lien_tot, int stat_time, hts_stat_struct* stats) { // appel� � chaque boucle de HTTrack
	static TStamp prev_mytime = 0; /* ok */
	static t_InpInfo SInfo; /* ok */
	//
	TStamp mytime;
	long int rate = 0;
	char st[256];
	//
	int stat_written = -1;
	int stat_updated = -1;
	int stat_errors = -1;
	int stat_warnings = -1;
	int stat_infos = -1;
	int nbk = -1;
	LLint nb = -1;
	int stat_nsocket = -1;
	LLint stat_bytes = -1;
	LLint stat_bytes_recv = -1;
	int irate = -1;
	if (stats) {
		stat_written = stats->stat_files;
		stat_updated = stats->stat_updated_files;
		stat_errors = stats->stat_errors;
		stat_warnings = stats->stat_warnings;
		stat_infos = stats->stat_infos;
		nbk = stats->nbk;
		stat_nsocket = stats->stat_nsocket;
		irate = (int) stats->rate;
		nb = stats->nb;
		stat_bytes = stats->nb;
		stat_bytes_recv = stats->HTS_TOTAL_RECV;
	}

	if (!use_show)
		return 1;

	mytime = mtime_local();
	if ((stat_time > 0) && (stat_bytes_recv > 0))
		rate = (int) (stat_bytes_recv / stat_time);
	else
		rate = 0; // pas d'infos

	/* Infos */
	if (stat_bytes >= 0)
		SInfo.stat_bytes = stat_bytes; // bytes
	if (stat_time >= 0)
		SInfo.stat_time = stat_time; // time
	if (lien_tot >= 0)
		SInfo.lien_tot = lien_tot; // nb liens
	if (lien_n >= 0)
		SInfo.lien_n = lien_n; // scanned
	SInfo.stat_nsocket = stat_nsocket; // socks
	if (rate > 0)
		SInfo.rate = rate; // rate
	if (irate >= 0)
		SInfo.irate = irate; // irate
	if (SInfo.irate < 0)
		SInfo.irate = SInfo.rate;
	if (nbk >= 0)
		SInfo.stat_back = nbk;
	if (stat_written >= 0)
		SInfo.stat_written = stat_written;
	if (stat_updated >= 0)
		SInfo.stat_updated = stat_updated;
	if (stat_errors >= 0)
		SInfo.stat_errors = stat_errors;
	if (stat_warnings >= 0)
		SInfo.stat_warnings = stat_warnings;
	if (stat_infos >= 0)
		SInfo.stat_infos = stat_infos;

	if (((mytime - prev_mytime) > 100) || ((mytime - prev_mytime) < 0)) {
		strc_int2bytes2 strc, strc2, strc3;
		prev_mytime = mytime;

		st[0] = '\0';
		qsec2str(st, stat_time);
		vt_home();
		printf(VT_GOTOXY("1","1")
		VT_CLREOL
		STYLE_STATTEXT "Bytes saved:"
		STYLE_STATVALUES " \t%s"
		"\t"
		VT_CLREOL
		VT_GOTOXY("40","1")
		STYLE_STATTEXT "Links scanned:"
		STYLE_STATVALUES " \t%d/%d (+%d)"
		VT_CLREOL"\n"VT_CLREOL
		VT_GOTOXY("1","2")
		STYLE_STATTEXT "Time:"
		" \t"
		STYLE_STATVALUES "%s"
		"\t"
		VT_CLREOL
		VT_GOTOXY("40","2")
		STYLE_STATTEXT "Files written:"
		" \t"
		STYLE_STATVALUES "%d"
		VT_CLREOL"\n"VT_CLREOL
		VT_GOTOXY("1","3")
		STYLE_STATTEXT "Transfer rate:"
		" \t"
		STYLE_STATVALUES "%s (%s)"
		"\t"
		VT_CLREOL
		VT_GOTOXY("40","3")
		STYLE_STATTEXT "Files updated:"
		" \t"
		STYLE_STATVALUES "%d"
		VT_CLREOL"\n"VT_CLREOL
		VT_GOTOXY("1","4")
		STYLE_STATTEXT "Active connections:"
		" \t"
		STYLE_STATVALUES "%d"
		"\t"
		VT_CLREOL
		VT_GOTOXY("40","4")
		STYLE_STATTEXT "Errors:"
		STYLE_STATVALUES " \t"
		STYLE_STATVALUES "%d"
		VT_CLREOL"\n"
		STYLE_STATRESET,
		/* */
		(char*) int2bytes(&strc, SInfo.stat_bytes), (int) lien_n,
				(int) SInfo.lien_tot, (int) nbk, (char*) st,
				(int) SInfo.stat_written,
				(char*) int2bytessec(&strc2, SInfo.irate),
				(char*) int2bytessec(&strc3, SInfo.rate),
				(int) SInfo.stat_updated, (int) SInfo.stat_nsocket,
				(int) SInfo.stat_errors
		/* */
		);

		// parcourir registre des liens
		if (back_index >= 0) { // seulement si index pass�
			int j, k;
			int index = 0;
			int ok = 0; // idem
			int l; // idem
			//
			t_StatsBuffer StatsBuffer[NStatsBuffer];

			{
				int i;
				for (i = 0; i < NStatsBuffer; i++) {
					strcpybuff(StatsBuffer[i].state,"");
					strcpybuff(StatsBuffer[i].name,"");
					strcpybuff(StatsBuffer[i].file,"");
					strcpybuff(StatsBuffer[i].url_sav,"");
					StatsBuffer[i].back = 0;
					StatsBuffer[i].size = 0;
					StatsBuffer[i].sizetot = 0;
				}
			}
			for (k = 0; k < 2; k++) { // 0: lien en cours 1: autres liens
				for (j = 0; (j < 3) && (index < NStatsBuffer); j++) { // passe de priorit�
					int _i;
					for (_i = 0 + k; (_i < max(back_max*k,1)) && (index
							< NStatsBuffer); _i++) { // no lien
						int i = (back_index + _i) % back_max; // commencer par le "premier" (l'actuel)
						if (back[i].status >= 0) { // signifie "lien actif"
							// int ok=0;  // OPTI
							ok = 0;
							switch (j) {
							case 0: // prioritaire
								if ((back[i].status > 0) && (back[i].status
										< 99)) {
									strcpybuff(StatsBuffer[index].state,"receive");
									ok = 1;
								}
								break;
							case 1:
								if (back[i].status == STATUS_WAIT_HEADERS) {
									strcpybuff(StatsBuffer[index].state,"request");
									ok = 1;
								} else if (back[i].status == STATUS_CONNECTING) {
									strcpybuff(StatsBuffer[index].state,"connect");
									ok = 1;
								} else if (back[i].status == STATUS_WAIT_DNS) {
									strcpybuff(StatsBuffer[index].state,"search");
									ok = 1;
								} else if (back[i].status
										== STATUS_FTP_TRANSFER) { // ohh le beau ftp
									sprintf(StatsBuffer[index].state,
											"ftp: %s", back[i].info);
									ok = 1;
								}
								break;
							default:
								if (back[i].status == STATUS_READY) { // pr�t
									if ((back[i].r.statuscode == 200)) {
										strcpybuff(StatsBuffer[index].state,"ready");
										ok = 1;
									} else if ((back[i].r.statuscode >= 100)
											&& (back[i].r.statuscode <= 599)) {
										char tempo[256];
										tempo[0] = '\0';
										infostatuscode(tempo,
												back[i].r.statuscode);
										strcpybuff(StatsBuffer[index].state,tempo);
										ok = 1;
									} else {
										strcpybuff(StatsBuffer[index].state,"error");
										ok = 1;
									}
								}
								break;
							}

							if (ok) {
								char BIGSTK s[HTS_URLMAXSIZE * 2];
								//
								StatsBuffer[index].back = i; // index pour + d'infos
								//
								s[0] = '\0';
								strcpybuff(StatsBuffer[index].url_sav,back[i].url_sav); // pour cancel
								if (strcmp(back[i].url_adr, "file://"))
									strcatbuff(s,back[i].url_adr);
								else
									strcatbuff(s,"localhost");
								if (back[i].url_fil[0] != '/')
									strcatbuff(s,"/");
								strcatbuff(s,back[i].url_fil);

								StatsBuffer[index].file[0] = '\0';
								{
									char* a = strrchr(s, '/');
									if (a) {
										strncatbuff(StatsBuffer[index].file,a,200);
										*a = '\0';
									}
								}

								if ((l = (int) strlen(s)) < MAX_LEN_INPROGRESS)
									strcpybuff(StatsBuffer[index].name,s);
								else {
									// couper
									StatsBuffer[index].name[0] = '\0';
									strncatbuff(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2);
									strcatbuff(StatsBuffer[index].name,"...");
									strcatbuff(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2);
								}

								if (back[i].r.totalsize > 0) { // taille pr�d�finie
									StatsBuffer[index].sizetot
											= back[i].r.totalsize;
									StatsBuffer[index].size = back[i].r.size;
								} else { // pas de taille pr�d�finie
									if (back[i].status == STATUS_READY) { // pr�t
										StatsBuffer[index].sizetot
												= back[i].r.size;
										StatsBuffer[index].size
												= back[i].r.size;
									} else {
										StatsBuffer[index].sizetot = 8192;
										StatsBuffer[index].size
												= (back[i].r.size % 8192);
									}
								}
								index++;
							}
						}
					}
				}
			}

			/* LF */
			printf("%s\n", VT_CLREOL);

			/* Display current job */
			{
				int parsing = 0;
				printf("Current job: ");
				if (!(parsing = hts_is_parsing(opt, -1)))
					printf("receiving files");
				else {
					switch (hts_is_testing(opt)) {
					case 0:
						printf("parsing HTML file (%d%%)", parsing);
						break;
					case 1:
						printf("parsing HTML file: testing links (%d%%)",
								parsing);
						break;
					case 2:
						printf("purging files");
						break;
					case 3:
						printf("loading cache");
						break;
					case 4:
						printf("waiting (scheduler)");
						break;
					case 5:
						printf("waiting (throttle)");
						break;
					}
				}
				printf("%s\n", VT_CLREOL);
			}

			/* Display background jobs */
			{
				int i;
				for (i = 0; i < NStatsBuffer; i++) {
					if (strnotempty(StatsBuffer[i].state)) {
						printf(VT_CLREOL" %s - \t%s%s \t%s / \t%s",
								StatsBuffer[i].state, StatsBuffer[i].name,
								StatsBuffer[i].file,
								int2bytes(&strc, StatsBuffer[i].size),
								int2bytes(&strc2, StatsBuffer[i].sizetot));
					}
					printf("%s\n", VT_CLREOL);
				}
			}

		}

	}

	return 1;
}
示例#5
0
文件: main.c 项目: rainkid/httrack
static void arg_init(struct hts_proj *proj) {
	char * a;
	char cmd[4096] = { 0x00 };
	char str[256] = {0x00};

	proj->argc = 1;
	if (proj->cache == -1) proj->cache = 2; //cache default is best chose
	if (proj->conns == -1) proj->conns = 10; //10 limits the number of connections per second
	if (proj->timeout == -1) proj->timeout = 10;


	if (strnotempty(proj->name) == 0) proj->errorno = 1;
	if (strnotempty(proj->urls) == 0) proj->errorno = 2;

 	while ((a = strchr(proj->urls, ','))) *a = ' ';
	while ((a = strchr(proj->urls, '\t'))) *a = ' ';
	strcat(cmd, proj->urls);
	strcat(cmd, " ");

	//connections limit
	sprintf(str, "-%%c%d ", proj->conns);
	strcat(cmd, str);

	//timeout
	sprintf(str, "-T%d ", proj->timeout);
	strcat(cmd, str);

	if (strnotempty(proj->savepath) == 0) {
		strcat(proj->savepath, getenv("HOME"));
		strcat(proj->savepath, "/websites/");
	}

	if (strnotempty(proj->savepath)){
		if ((proj->savepath[strlen(proj->savepath)-1]!='/') &&
			(proj->savepath[strlen(proj->savepath)-1]!='\\')) {
			 strcat(proj->savepath, "/");
		}
	}

	strcat(cmd, "-q ");

	if (strnotempty(proj->linklist)) {
		strcat(cmd, "-%L");
		strcat(cmd, proj->linklist);
		strcat(cmd, " ");
	}

	sprintf(str, "-C%d ", proj->cache);
	strcat(cmd, str);

	switch (proj->action) {
	case ACTION_ALL_SITES:
		strcat(cmd, "-w ");
		break;
	case ACTION_ONLY_FILES:
		strcat(cmd, "-g ");
		break;
	case ACTION_WITH_CACHE:
		strcat(cmd, "-i ");
		break;
	case ACTION_FIST_LEVEL:
		strcat(cmd, "-Y ");
		break;
	}

	strcat(cmd," --path \"");
	strcat(cmd, proj->savepath);
	strcat(cmd, proj->name);
	strcat(cmd, "\" ");

	sprintf(str, "-p%d ", proj->priority);
	strcat(cmd, str);

	if (strnotempty(proj->proxy_port) == 0) {
		sprintf(proj->proxy_port,"%s", "8080");
	}

	if (proj->retries) {
		sprintf(str, "-R%d ", proj->retries);
		strcat(cmd, str);
	}

	if (strnotempty(proj->proxy)) {
		strcat(cmd, "--proxy ");
		strcat(cmd, proj->proxy);
		strcat(cmd, ":");
		strcat(cmd, proj->proxy_port);
	}

	if (proj->depth > 0) {
		sprintf(str,"-r%d ", proj->depth);
		strcat(cmd, str);
	}

	proj->argv[0] = "httrack";
	proj->argc++;
	proj->argv[1] = cmd;

	fprintf(stderr, "%s\n", cmd);

	int i = 0;
	int g = 0;
	while (cmd[i]) {
		if (cmd[i] == '\"')	g = !g;
		if (cmd[i] == ' ') {
			if (!g) {
				cmd[i] = '\0';
				proj->argv[proj->argc++] = cmd + i + 1;
			}
		}
		i++;
	}
}
示例#6
0
// lire cookies.txt
// lire également (Windows seulement) les *@*.txt (cookies IE copiés)
// !=0 : erreur
int cookie_load(t_cookie * cookie, const char *fpath, const char *name) {
  char catbuff[CATBUFF_SIZE];
  char buffer[8192];

  //  cookie->data[0]='\0';

  // Fusionner d'abord les éventuels cookies IE
#ifdef _WIN32
  {
    WIN32_FIND_DATAA find;
    HANDLE h;
    char pth[MAX_PATH + 32];

    strcpybuff(pth, fpath);
    strcatbuff(pth, "*@*.txt");
    h = FindFirstFileA((char *) pth, &find);
    if (h != INVALID_HANDLE_VALUE) {
      do {
        if (!(find.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
          if (!(find.dwFileAttributes & FILE_ATTRIBUTE_SYSTEM)) {
            FILE *fp = fopen(fconcat(catbuff, fpath, find.cFileName), "rb");

            if (fp) {
              char cook_name[256];
              char cook_value[1000];
              char domainpathpath[512];
              char dummy[512];

              //
              char domain[256]; // domaine cookie (.netscape.com)
              char path[256];   // chemin (/)
              int cookie_merged = 0;

              //
              // Read all cookies
              while(!feof(fp)) {
                cook_name[0] = cook_value[0] = domainpathpath[0]
                  = dummy[0] = domain[0] = path[0] = '\0';
                linput(fp, cook_name, 250);
                if (!feof(fp)) {
                  linput(fp, cook_value, 250);
                  if (!feof(fp)) {
                    int i;

                    linput(fp, domainpathpath, 500);
                    /* Read 6 other useless values */
                    for(i = 0; !feof(fp) && i < 6; i++) {
                      linput(fp, dummy, 500);
                    }
                    if (strnotempty(cook_name)
                        && strnotempty(cook_value)
                        && strnotempty(domainpathpath)) {
                      if (ident_url_absolute(domainpathpath, domain, path) >= 0) {
                        cookie_add(cookie, cook_name, cook_value, domain, path);
                        cookie_merged = 1;
                      }
                    }
                  }
                }
              }
              fclose(fp);
              if (cookie_merged)
                remove(fconcat(catbuff, fpath, find.cFileName));
            }                   // if fp
          }
      } while(FindNextFileA(h, &find));
      FindClose(h);
    }
  }
#endif

  // Ensuite, cookies.txt
  {
    FILE *fp = fopen(fconcat(catbuff, fpath, name), "rb");

    if (fp) {
      char BIGSTK line[8192];

      while((!feof(fp)) && (((int) strlen(cookie->data)) < cookie->max_len)) {
        rawlinput(fp, line, 8100);
        if (strnotempty(line)) {
          if (strlen(line) < 8000) {
            if (line[0] != '#') {
              char domain[256]; // domaine cookie (.netscape.com)
              char path[256];   // chemin (/)
              char cook_name[1024];     // nom cookie (MYCOOK)
              char BIGSTK cook_value[8192];     // valeur (ID=toto,S=1234)

              strcpybuff(domain, cookie_get(buffer, line, 0));  // host
              strcpybuff(path, cookie_get(buffer, line, 2));    // path
              strcpybuff(cook_name, cookie_get(buffer, line, 5));       // name
              strcpybuff(cook_value, cookie_get(buffer, line, 6));      // value
#if DEBUG_COOK
              printf("%s\n", line);
#endif
              cookie_add(cookie, cook_name, cook_value, domain, path);
            }
          }
        }
      }
      fclose(fp);
      return 0;
    }
  }
  return -1;
}
示例#7
0
// catch_url
// returns 0 if error
// url: buffer where URL must be stored - or ip:port in case of failure
// data: 32Kb
HTSEXT_API int catch_url(T_SOC soc, char *url, char *method, char *data) {
  int retour = 0;

  // connexion (accept)
  if (soc != INVALID_SOCKET) {
    T_SOC soc2;

    while((soc2 = (T_SOC) accept(soc, NULL, NULL)) == INVALID_SOCKET) ;
    /*
       #ifdef _WIN32
       closesocket(soc);
       #else
       close(soc);
       #endif
     */
    soc = soc2;
    /* INFOS */
    {
      SOCaddr server2;
      SOClen len = SOCaddr_capacity(server2);

      if (getpeername(soc, &SOCaddr_sockaddr(server2), &len) == 0) {
        char dot[256 + 2];

        SOCaddr_inetntoa(dot, sizeof(dot), server2);
        sprintf(url, "%s:%d", dot, ntohs(SOCaddr_sinport(server2)));
      }
    }
    /* INFOS */

    // réception
    if (soc != INVALID_SOCKET) {
      char line[1000];
      char protocol[256];

      line[0] = protocol[0] = '\0';
      //
      socinput(soc, line, 1000);
      if (strnotempty(line)) {
        if (sscanf(line, "%s %s %s", method, url, protocol) == 3) {
          lien_adrfil af;

          // méthode en majuscule
          size_t i;
          int r = 0;

          af.adr[0] = af.fil[0] = '\0';
          //
          for(i = 0; method[i] != '\0'; i++) {
            if ((method[i] >= 'a') && (method[i] <= 'z'))
              method[i] -= ('a' - 'A');
          }
          // adresse du lien
          if (ident_url_absolute(url, &af) >= 0) {
            // Traitement des en-têtes
            char BIGSTK loc[HTS_URLMAXSIZE * 2];
            htsblk blkretour;

            hts_init_htsblk(&blkretour);
            //memset(&blkretour, 0, sizeof(htsblk));    // effacer
            blkretour.location = loc;   // si non nul, contiendra l'adresse véritable en cas de moved xx
            // Lire en têtes restants
            sprintf(data, "%s %s %s\r\n", method, af.fil, protocol);
            while(strnotempty(line)) {
              socinput(soc, line, 1000);
              treathead(NULL, NULL, NULL, &blkretour, line);    // traiter
              strcatbuff(data, line);
              strcatbuff(data, "\r\n");
            }
            // CR/LF final de l'en tête inutile car déja placé via la ligne vide juste au dessus
            //strcatbuff(data,"\r\n");
            if (blkretour.totalsize > 0) {
              int len = (int) min(blkretour.totalsize, 32000);
              int pos = (int) strlen(data);

              // Copier le reste (post éventuel)
              while((len > 0)
                    && ((r = recv(soc, (char *) data + pos, len, 0)) > 0)) {
                pos += r;
                len -= r;
                data[pos] = '\0';       // terminer par NULL
              }
            }
            // Envoyer page
            sprintf(line, CATCH_RESPONSE);
            send(soc, line, (int) strlen(line), 0);
            // OK!
            retour = 1;
          }
        }
      }                         // sinon erreur
    }
  }
  if (soc != INVALID_SOCKET) {
#ifdef _WIN32
    closesocket(soc);
    /*
       WSACleanup();
     */
#else
    close(soc);
#endif
  }
  return retour;
}
示例#8
0
static int hts_parse_java(t_hts_callbackarg * carg, httrackp * opt,
                          htsmoduleStruct * str) {
  /* The wrapper_name memebr has changed: not for us anymore */
  if (str->wrapper_name == NULL || strcmp(str->wrapper_name, libName) != 0) {
    /* Call parent functions if multiple callbacks are chained. */
    if (CALLBACKARG_PREV_FUN(carg, parse) != NULL) {
      return CALLBACKARG_PREV_FUN(carg, parse) (CALLBACKARG_PREV_CARG(carg),
                                                opt, str);
    }
    strcpy(str->err_msg,
           "unexpected error: bad wrapper_name and no previous wrapper");
    return 0;                   /* Unexpected error */
  } else {
    if (detect_mime(str)) {

      /* (Legacy code) */
      char catbuff[CATBUFF_SIZE];
      FILE *fpout;
      JAVA_HEADER header;
      RESP_STRUCT *tab;
      const char *file = str->filename;

      str->relativeToHtmlLink = 1;

#if JAVADEBUG
      printf("fopen\n");
#endif
      if ((fpout = FOPEN(fconv(catbuff, sizeof(catbuff), file), "r+b")) == NULL) {
        //fprintf(stderr, "Cannot open input file.\n");
        sprintf(str->err_msg, "Unable to open file %s", file);
        return 0;               // une erreur..
      }
#if JAVADEBUG
      printf("fread\n");
#endif
      //if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) {   // pas complet..
      if (fread(&header, 1, 10, fpout) != 10) { // pas complet..
        fclose(fpout);
        sprintf(str->err_msg, "File header too small (file len = " LLintP ")",
                (LLint) fsize(file));
        return 0;
      }
#if JAVADEBUG
      printf("header\n");
#endif
      // tester en tête
      if (reverse_endian()) {
        header.magic = hts_swap32(header.magic);
        header.count = hts_swap16(header.count);
      }
      if (header.magic != 0xCAFEBABE) {
        sprintf(str->err_msg, "non java file");
        if (fpout) {
          fclose(fpout);
          fpout = NULL;
        }
        return 0;
      }

      tab = (RESP_STRUCT *) calloc(header.count, sizeof(RESP_STRUCT));
      if (!tab) {
        sprintf(str->err_msg, "Unable to alloc %d bytes",
                (int) sizeof(RESP_STRUCT));
        if (fpout) {
          fclose(fpout);
          fpout = NULL;
        }
        return 0;               // erreur..
      }
#if JAVADEBUG
      printf("calchead\n");
#endif
      {
        int i;

        for(i = 1; i < header.count; i++) {
          int err = 0;          // ++    

          tab[i] = readtable(str, fpout, tab[i], &err);
          if (!err) {
            if ((tab[i].type == HTS_LONG) || (tab[i].type == HTS_DOUBLE))
              i++;              //2 element si double ou float
          } else {              // ++ une erreur est survenue!
            if (strnotempty(str->err_msg) == 0)
              strcpy(str->err_msg, "Internal readtable error");
            free(tab);
            if (fpout) {
              fclose(fpout);
              fpout = NULL;
            }
            return 0;
          }
        }

      }

#if JAVADEBUG
      printf("addfiles\n");
#endif
      {
        //unsigned int acess;
        unsigned int Class;
        unsigned int SClass;
        int i;

        //acess = readshort(fpout);
        Class = readshort(fpout);
        SClass = readshort(fpout);

        for(i = 1; i < header.count; i++) {

          if (tab[i].type == HTS_CLASS) {

            if ((tab[i].index1 < header.count) && (tab[i].index1 >= 0)) {

              if ((tab[i].index1 != SClass) && (tab[i].index1 != Class)
                  && (tab[tab[i].index1].name[0] != '[')) {

                if (!strstr(tab[tab[i].index1].name, "java/")) {
                  char BIGSTK tempo[1024];

                  tempo[0] = '\0';

                  sprintf(tempo, "%s.class", tab[tab[i].index1].name);
#if JAVADEBUG
                  printf("add %s\n", tempo);
#endif
                  if (tab[tab[i].index1].file_position >= 0)
                    str->addLink(str, tempo);   /* tab[tab[i].index1].file_position */
                }

              }
            } else {
              i = header.count; // exit 
            }
          }

        }
      }

#if JAVADEBUG
      printf("end\n");
#endif
      free(tab);
      if (fpout) {
        fclose(fpout);
        fpout = NULL;
      }
      return 1;

    } else {
      strcpy(str->err_msg, "bad MIME type");
    }
  }
  return 0;                     /* Error */
}
示例#9
0
文件: htshelp.c 项目: ahua/c
void help_wizard(httrackp* opt) {
  char* urls = (char*) malloct(HTS_URLMAXSIZE*2);
  char* mainpath = (char*) malloct(256);
  char* projname = (char*) malloct(256);
  char* stropt = (char*) malloct(2048);     // options
  char* stropt2 = (char*) malloct(2048);    // options longues
  char* strwild = (char*) malloct(2048);    // wildcards
  char* cmd = (char*) malloct(4096);
  char* str = (char*) malloct(256);
  char** argv = (char**) malloct(256 * sizeof(char*));
  //
  char* a;
  //
  if (urls == NULL || mainpath == NULL || projname == NULL || stropt == NULL 
    || stropt2 == NULL || strwild == NULL || cmd == NULL || str == NULL || argv == NULL) {
    fprintf(stderr, "* memory exhausted in %s, line %d\n", __FILE__, __LINE__);
    return;
  }
  urls[0] = mainpath[0] = projname[0] = stropt[0] = stropt2[0] = strwild[0] = cmd[0] = str[0] = '\0';
  //
  strcpybuff(stropt,"-");
  mainpath[0]=projname[0]=stropt2[0]=strwild[0]='\0';
  //
  
  printf("\n");
  printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"%s\n", hts_get_version_info(opt));
  printf("Copyright (C) Xavier Roche and other contributors\n");
#ifdef _WIN32
  printf("Note: You are running the commandline version,\n");
  printf("run 'WinHTTrack.exe' to get the GUI version.\n");
#endif
#ifdef HTTRACK_AFF_WARNING
  printf("NOTE: "HTTRACK_AFF_WARNING"\n");
#endif
#ifdef HTS_PLATFORM_NAME
#if USE_BEGINTHREAD
  printf("[compiled: "HTS_PLATFORM_NAME" - MT]\n");
#else
  printf("[compiled: "HTS_PLATFORM_NAME"]\n");
#endif
#endif
  printf("To see the option list, enter a blank line or try httrack --help\n");
  //
  // Project name
  while(strnotempty(projname)==0) {
    printf("\n");
    printf("Enter project name :");
    fflush(stdout);
    linput(stdin,projname,250);
    if (strnotempty(projname)==0)
      help("httrack",1);
  }
  //
  // Path
  if (strnotempty(hts_gethome()))
    printf("\nBase path (return=%s/websites/) :",hts_gethome());
  else
    printf("\nBase path (return=current directory) :");
  linput(stdin,str,250);
  if (!strnotempty(str)) {
    strcatbuff(str,hts_gethome());
    strcatbuff(str,"/websites/");
  }
  if (strnotempty(str))
  if ((str[strlen(str)-1]!='/') && (str[strlen(str)-1]!='\\'))
    strcatbuff(str,"/");
  strcatbuff(stropt2,"-O \""); strcatbuff(stropt2,str); strcatbuff(stropt2,projname); strcatbuff(stropt2,"\" ");
  // Créer si ce n'est fait un index.html 1er niveau
  make_empty_index(str);
  //
  printf("\n");
  printf("Enter URLs (separated by commas or blank spaces) :");
  fflush(stdout);
  linput(stdin,urls,250);
  if (strnotempty(urls)) {
    while( (a=strchr(urls,',')) )  *a=' ';
    while( (a=strchr(urls,'\t')) ) *a=' ';
    
    // Action
    printf("\nAction:\n");
    switch(help_query("Mirror Web Site(s)|Mirror Web Site(s) with Wizard|Just Get Files Indicated|Mirror ALL links in URLs (Multiple Mirror)|Test Links In URLs (Bookmark Test)|Update/Continue a Mirror",1)) {
    case 1: break; 
    case 2: strcatbuff(stropt,"W"); break;
    case 3: strcatbuff(stropt2,"--get "); break;
    case 4: strcatbuff(stropt2,"--mirrorlinks "); break;
    case 5: strcatbuff(stropt2,"--testlinks "); break;
    case 6: strcatbuff(stropt2,"--update "); break;
    case 0: return; break;
    }
      
    // Proxy
    printf("\nProxy (return=none) :");
    linput(stdin,str,250);
    if (strnotempty(str)) {
      while( (a=strchr(str,' ')) ) *a=':';    // port
      if (!strchr(jump_identification(str),':')) {
        char str2[256];
        printf("\nProxy port (return=8080) :");
        linput(stdin,str2,250);
        strcatbuff(str,":");
        if (strnotempty(str2)==0)
          strcatbuff(str,"8080");
        else
          strcatbuff(str,str2);
      }
      strcatbuff(stropt2,"-P "); strcatbuff(stropt2,str); strcatbuff(stropt2," ");
    }
    
    // Display
    strcatbuff(stropt2," -%v ");

    // Wildcards
    printf("\nYou can define wildcards, like: -*.gif +www.*.com/*.zip -*img_*.zip\n");
    printf("Wildcards (return=none) :");
    linput(stdin,strwild,250);
    
    // Options
    do {
      printf("\nYou can define additional options, such as recurse level (-r<number>), separed by blank spaces\n");
      printf("To see the option list, type help\n");
      printf("Additional options (return=none) :");
      linput(stdin,str,250);
      if (strfield2(str,"help")) {
        help("httrack",2);
      } else if (strnotempty(str)) {
        strcatbuff(stropt2,str);
        strcatbuff(stropt2," ");
      }
    } while(strfield2(str,"help"));
    
    {
      int argc=1;
      int g=0;
      int i=0;
      //
      printf("\n");
      if (strlen(stropt)==1)
        stropt[0]='\0';    // aucune
      sprintf(cmd,"%s %s %s %s",urls,stropt,stropt2,strwild);
      printf("---> Wizard command line: httrack %s\n\n",cmd);
      printf("Ready to launch the mirror? (Y/n) :");
      fflush(stdout);
      linput(stdin,str,250);
      if (strnotempty(str)) {
        if (!((str[0]=='y') || (str[0]=='Y')))
          return ;
      }
      printf("\n");

      // couper en morceaux
      argv[0]="winhttrack";
      argv[1]=cmd;
      argc++;    
      while(cmd[i])  {
        if(cmd[i]=='\"') g=!g;
        if(cmd[i]==' '){
          if(!g){
            cmd[i]='\0';
            argv[argc++]=cmd+i+1;
          }
        }  
        i++;
      }
      hts_main(argc,argv);
    }
  //} else {
  //  help("httrack",1);
  }

  /* Free buffers */
  freet(urls);
  freet(mainpath);
  freet(projname);
  freet(stropt);
  freet(stropt2);
  freet(strwild);
  freet(cmd);
  freet(str);
}
示例#10
0
// la véritable fonction une fois lancées les routines thread/fork
int run_launch_ftp(FTPDownloadStruct * pStruct) {
  lien_back *back = pStruct->pBack;
  httrackp *opt = pStruct->pOpt;
  char user[256] = "anonymous";
  char pass[256] = "user@";
  char line_retr[2048];
  int port = 21;

#if FTP_PASV
  int port_pasv = 0;
#endif
  char BIGSTK adr_ip[1024];
  char *adr, *real_adr;
  char *ftp_filename = "";
  int timeout = 300;            // timeout
  int timeout_onfly = 8;        // attente réponse supplémentaire
  int transfer_list = 0;        // directory
  int rest_understood = 0;      // rest command understood
  t_fullhostent fullhostent_buffer;     // buffer pour resolver

  //
  T_SOC soc_ctl = INVALID_SOCKET;
  T_SOC soc_servdat = INVALID_SOCKET;
  T_SOC soc_dat = INVALID_SOCKET;

  //
  SOCaddr server_data;
  int server_data_size = sizeof(server_data);

  //
  line_retr[0] = adr_ip[0] = '\0';

  timeout = 300;

  // effacer
  strcpybuff(back->r.msg, "");
  back->r.statuscode = 0;
  back->r.size = 0;

  // récupérer user et pass si présents, et sauter user:id@ dans adr
  real_adr = strchr(back->url_adr, ':');
  if (real_adr)
    real_adr++;
  else
    real_adr = back->url_adr;
  while(*real_adr == '/')
    real_adr++;                 // sauter /
  if ((adr = jump_identification(real_adr)) != real_adr) {      // user
    int i = -1;

    pass[0] = '\0';
    do {
      i++;
      user[i] = real_adr[i];
    } while((real_adr[i] != ':') && (real_adr[i]));
    user[i] = '\0';
    if (real_adr[i] == ':') {   // pass
      int j = -1;

      i++;                      // oui on saute aussi le :
      do {
        j++;
        pass[j] = real_adr[i + j];
      } while(((&real_adr[i + j + 1]) < adr) && (real_adr[i + j]));
      pass[j] = '\0';
    }
  }
  // Calculer RETR <nom>
  {
    char *a;

#if 0
    a = back->url_fil + strlen(back->url_fil) - 1;
    while((a > back->url_fil) && (*a != '/'))
      a--;
    if (*a != '/') {
      a = NULL;
    }
#else
    a = back->url_fil;
#endif
    if (a != NULL && *a != '\0') {
#if 0
      a++;                      // sauter /
#endif
      ftp_filename = a;
      if (strnotempty(a)) {
        char catbuff[CATBUFF_SIZE];
        char *ua = unescape_http(catbuff, a);
        int len_a = (int) strlen(ua);

        if (len_a > 0 && ua[len_a - 1] == '/') {        /* obviously a directory listing */
          transfer_list = 1;
          snprintf(line_retr, sizeof(line_retr), "LIST -A %s", ua);
        } else if ((strchr(ua, ' '))
                   || (strchr(ua, '\"'))
                   || (strchr(ua, '\''))
          ) {
          snprintf(line_retr, sizeof(line_retr), "RETR \"%s\"", ua);
        } else {                /* Regular one */
          snprintf(line_retr, sizeof(line_retr), "RETR %s", ua);
        }
      } else {
        transfer_list = 1;
        snprintf(line_retr, sizeof(line_retr), "LIST -A");
      }
    } else {
      strcpybuff(back->r.msg, "Unexpected PORT error");
      // back->status=STATUS_FTP_READY;    // fini
      back->r.statuscode = STATUSCODE_INVALID;
    }
  }

#if FTP_DEBUG
  printf("Connecting to %s...\n", adr);
#endif

  // connexion
  {
    SOCaddr server;
    int server_size = sizeof(server);
    t_hostent *hp;
    char *a;
    char _adr[256];
    const char *error = "unknown error";

    _adr[0] = '\0';
    //T_SOC soc_ctl;
    // effacer structure
    memset(&server, 0, sizeof(server));

    // port
    a = strchr(adr, ':');       // port
    if (a) {
      sscanf(a + 1, "%d", &port);
      strncatbuff(_adr, adr, (int) (a - adr));
    } else
      strcpybuff(_adr, adr);

    // récupérer adresse résolue
    strcpybuff(back->info, "host name");
    hp = hts_gethostbyname2(opt, _adr, &fullhostent_buffer, &error);
    if (hp == NULL) {
      snprintf(back->r.msg, sizeof(back->r.msg),
               "Unable to get server's address: %s", error);
      // back->status=STATUS_FTP_READY;    // fini
      back->r.statuscode = STATUSCODE_NON_FATAL;
      _HALT_FTP return 0;
    }
    _CHECK_HALT_FTP;

    // copie adresse
    SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length);
    // copie adresse pour cnx data
    SOCaddr_copyaddr(server_data, server_data_size, hp->h_addr_list[0],
                     hp->h_length);
    // memcpy(&server.sin_addr, hp->h_addr, hp->h_length);

    // créer ("attachement") une socket (point d'accès) internet,en flot
    soc_ctl = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
    if (soc_ctl == INVALID_SOCKET) {
      strcpybuff(back->r.msg, "Unable to create a socket");
      // back->status=STATUS_FTP_READY;    // fini
      back->r.statuscode = STATUSCODE_INVALID;
      _HALT_FTP return 0;
    }
void LANG_LOAD(char* limit_to) {
  CWaitCursor wait;
  //
  extern int NewLangStrSz;
  extern coucal NewLangStr;
  extern int NewLangStrKeysSz;
  extern coucal NewLangStrKeys;
  //
  int selected_lang=LANG_T(-1);
  //
  if (!limit_to) {
    LANG_DELETE();
    NewLangStr=coucal_new(NewLangStrSz);
    NewLangStrKeys=coucal_new(NewLangStrKeysSz);
    if ((NewLangStr==NULL) || (NewLangStrKeys==NULL)) {
      AfxMessageBox("Error in lang.h: not enough memory");
    } else {
      coucal_value_is_malloc(NewLangStr,1);
      coucal_value_is_malloc(NewLangStrKeys,1);
    }
  }

  TCHAR ModulePath[MAX_PATH + 1];
  ModulePath[0] = '\0';
  ::GetModuleFileName(NULL, ModulePath, sizeof(ModulePath)/sizeof(TCHAR) - 1);
  TCHAR* pos = _tcsrchr(ModulePath, '\\');
  if (pos != NULL)
  {
    * ( pos + 1) = '\0';
  } else {
    ModulePath[0] = '\0';
  }

  /* Load master file (list of keys and internal keys) */
  CString app = ModulePath;
  if (!limit_to) {
    CString mname=app+"lang.def";
    if (!fexist((char*)LPCTSTR(mname)))
      mname="lang.def";
    FILE* fp=fopen(mname,"rb");
    if (fp) {
      char intkey[8192];
      char key[8192];
      while(!feof(fp)) {
        linput_cpp(fp,intkey,8000);
        linput_cpp(fp,key,8000);
        if (strnotempty(intkey) && strnotempty(key)) {
          char* test=LANGINTKEY(key);

          /* Increment for multiple definitions */
          if (strnotempty(test)) {
            int increment=0;
            size_t pos = strlen(key);
            do {
              increment++;
              sprintf(key+pos,"%d",increment);
              test=LANGINTKEY(key);
            }  while (strnotempty(test));
          }

          if (!strnotempty(test)) {         // éviter doublons
            // conv_printf(key,key);
            size_t len;
            char* buff;
            len=strlen(intkey);
            buff=(char*)malloc(len+2);
            if (buff) {
              strcpybuff(buff,intkey);
              coucal_add(NewLangStrKeys,key,(intptr_t)buff);
            }
          }
        } // if
      }  // while
      fclose(fp);
    } else {
      AfxMessageBox("FATAL ERROR\r\n'lang.def' file NOT FOUND!\r\nEnsure that the installation was complete!");
      exit(0);
    }
  }
  
  /* Language Name? */
  char* hashname;
  {
    char name[256];
    sprintf(name,"LANGUAGE_%d",selected_lang+1);
    hashname=LANGINTKEY(name);
  }

  /* Get only language name */
  if (limit_to) {
    if (hashname)
      strcpybuff(limit_to,hashname);
    else
      strcpybuff(limit_to,"???");
    return;
  }

  /* Error */
  if (!hashname)
    return;

  // xxc TEST
  /*
  setlocale( LC_ALL, "Japanese");
  _setmbcp(932);    // shift-jis
  setlocale( LC_ALL, ".932" );
  setlocale( LC_ALL, "[.932]" );
  CString st="";
  int lid=SetThreadLocale(MAKELCID(MAKELANGID(LANG_JAPANESE,SUBLANG_NEUTRAL),SORT_DEFAULT ));
  */

  /* Load specific language file */
  {
    int loops;
    CString err_msg="";
    // 2nd loop: load undefined strings
    for(loops=0;loops<2;loops++) {
      CString lbasename;
      
      {
        char name[256];
        sprintf(name,"LANGUAGE_%d",(loops==0)?(selected_lang+1):1);
        hashname=LANGINTKEY(name);
      }
      lbasename.Format("lang/%s.txt",hashname);
      CString lname=app+lbasename;
      if (!fexist((char*)LPCTSTR(lname)))
        lname=lbasename;
      FILE* fp=fopen(lname,"rb");
      if (fp) {
        char extkey[8192];
        TCHAR value[8192];
        while(!feof(fp)) {
          //int ssz;
          linput_cpp(fp,extkey,8000);
          linput_cpp(fp,value,8000);
          /*
          ssz=linput_cpp(fp,value,8000);
          CString st=value;
          AfxMessageBox(st);
          if (ssz>0) {
            int tst=0;
            int test=IsTextUnicode(value,ssz,&tst);
            unsigned short st2[1024];
            int ret=MultiByteToWideChar(CP_UTF8,0,(char*)value,ssz,st2,1024);
            if (ret>0) {
              char st3[1024]="";
              int ret2=WideCharToMultiByte(CP_THREAD_ACP,0,st2,ret,(char*)st3,1024,NULL,FALSE);
              if (ret2>0) {
                AfxMessageBox(st3);
              }
            }
          }
          */

          if (strnotempty(extkey) && strnotempty(value)) {
            int len;
            char* buff;
            char* intkey;
            
            intkey=LANGINTKEY(extkey);
            
            if (strnotempty(intkey)) {
              
              /* Increment for multiple definitions */
              {
                char* test=LANGSEL(intkey);
                if (strnotempty(test)) {
                  if (loops == 0) {
                    int increment=0;
                    size_t pos=strlen(extkey);
                    do {
                      increment++;
                      sprintf(extkey+pos,"%d",increment);
                      intkey=LANGINTKEY(extkey);
                      if (strnotempty(intkey))
                        test=LANGSEL(intkey);
                      else
                        test="";
                    }  while (strnotempty(test));
                  } else
                    intkey="";
                } else {
                  if (loops > 0) {
                    err_msg += intkey;
                    err_msg += " ";
                  }
                }
              }
              
              /* Add key */
              if (strnotempty(intkey)) {
                len = (int) strlen(value);
                buff = (char*)malloc(len+2);
                if (buff) {
                  conv_printf(value,buff);
                  coucal_add(NewLangStr,intkey,(intptr_t)buff);
                }
              }
              
            }
          } // if
        }  // while
        fclose(fp);
      } else {
        AfxMessageBox("FATAL ERROR\r\n'lang.def' file NOT FOUND!\r\nEnsure that the installation was complete!");
        exit(0);
      }
    }
    if (err_msg.GetLength()>0) {
      // AfxMessageBox("Error: undefined strings follows:\r\n"+err_msg);
    }
  }



#if 0
  app=app+"lang.h";
  if (!fexist((char*)LPCTSTR(app)))
    app="lang.h";
  
  FILE* fp=fopen(app,"rb");
  if (fp) {
    char s[8192];
    while(!feof(fp)) {
      linput_cpp(fp,s,8000);
      if (!strncmp(s,"#define ",8)) {
        char* a;
        char* name=s+8;
        a=name;
        while((*a!=' ') && (*a)) a++;
        if ((*a) && (strlen(name)>0) && (((int) a - (int) name)<64)) {
          *a++='\0';
          if (limit_to) {
            if (strcmp(name,limit_to))
              a=NULL;
          }
          if (a) {
            char* data;
            data=a;
            int toggle=0;
            char* start_str=NULL;
            int count=0;
            while(*a) {
              if (*a=='\"') {
                toggle++;
                if ((toggle%2)==1) {
                  if (count==selected_lang) {
                    start_str=a+1;
                  }
                  count++;
                } else {
                  if (start_str) {
                    char* buff;
                    int len;
                    len=(int) a - (int) start_str;
                    if (len) {
                      buff=(char*)malloc(len+2);
                      if (buff) {
                        int i=0,j=0;
                        buff[0]='\0';
                        //strncatbuff(buff,start_str,len);
                        while(i<len) {
                          switch(start_str[i]) {
                          case '\\': 
                            i++;
                            switch(start_str[i]) {
                            case 'a': buff[j]='\a'; break;
                            case 'b': buff[j]='\b'; break;
                            case 'f': buff[j]='\f'; break;
                            case 'n': buff[j]='\n'; break;
                            case 'r': buff[j]='\r'; break;
                            case 't': buff[j]='\t'; break;
                            case 'v': buff[j]='\v'; break;
                            case '\'': buff[j]='\''; break;
                            case '\"': buff[j]='\"'; break;
                            case '\\': buff[j]='\\'; break;
                            case '?': buff[j]='\?'; break;
                            default: buff[j]=start_str[i]; break;
                            }
                            break;
                            default: 
                              buff[j]=start_str[i]; 
                              break;
                          }
                          i++;
                          j++;
                        }
                        buff[j++]='\0';
                        if (!limit_to)
                          coucal_add(NewLangStr,name,(intptr_t)buff);
                        else {
                          strcpybuff(limit_to,buff);
                          free(buff);
                          return;
                        }
                      }
                    }
                    start_str=NULL;
                  }
                }
              }
              a++;
            }
          }
          
          //NewLangStr.SetAt(sname,st);
          /*
          } else {
          CString info;
          info.Format("Error in lang.h: %s",name);
          AfxMessageBox(info);
        */
        }
      }
    }


    fclose(fp);

  } else {
    AfxMessageBox("FATAL ERROR\r\n'lang.h' file NOT FOUND!\r\nEnsure that the installation was complete!");
    exit(0);
  }
#endif

  // Control limit_to
  if (limit_to)
    limit_to[0]='\0';

  // Set locale
  if (!limit_to) {
    CString charset = LANGUAGE_CHARSET;
    charset.TrimLeft();
    charset.TrimRight();
    charset.MakeLower();
    NewLangCP = CP_THREAD_ACP;
    NewLangFileCP = CP_THREAD_ACP;
#if 0
    if (charset.GetLength() > 0) {
      if (charset.Left(9) == "iso-8859-") {
        int iso = 0;
        int isoCP[] = {0, /* 0 */
          1252, /* ISO-8859-1 */
          1250, /* ISO-8859-2 */
          0, /* ISO-8859-3 */
          0, /* ISO-8859-4 */
          1251, /* ISO-8859-5 */
          1256, /* ISO-8859-6 */
          1253, /* ISO-8859-7 */
          1255, /* ISO-8859-8 */
          1254, /* ISO-8859-9 */
        };
        if (sscanf(charset.GetBuffer(0) + 9, "%d", &iso) == 1) {
          if (iso < sizeof(isoCP)/sizeof(isoCP[0])) {
            if (isoCP[iso] != 0) {
              NewLangFileCP = isoCP[iso];
            }
          }
        }
      } else if (charset.Left(8) == "windows-") {
        int windows = 0;
        if (sscanf(charset.GetBuffer(0) + 8, "%d", &windows) == 1) {
          NewLangFileCP = windows;
        }
      } else if (charset == "shift-jis") {
        NewLangFileCP = 932;
      } else if (charset == "big5") {
        NewLangFileCP = 950;
      } else if (charset == "gb2312") {
        NewLangFileCP = 936;
      } else {
        NewLangFileCP = CP_THREAD_ACP;
      }
    }
    WORD acp = GetACP();
    if (NewLangFileCP != CP_THREAD_ACP && NewLangFileCP != acp) {
      char* currName = LANGUAGE_WINDOWSID;
      LCID thl = GetThreadLocale();
      WORD sid = SORTIDFROMLCID(thl);
      WORD lid = 0;
      WinLangid* lids;
      if (currName[0]) {
        for( lids = (WinLangid*)&WINDOWS_LANGID ; lids->name != NULL ; lids++ ) {
          if (strcmp(currName, lids->name) == 0) {
            lid = lids->langId;
            break;
          }
        }
        if (lid != 0) {
          SetThreadLocale(MAKELCID(lid, sid));
        }
      }
    }
#endif

  }

}
示例#12
0
/* 
   Indexing system
   A little bit dirty, (quick'n dirty, in fact)
   But should be okay on most cases
   Tags and javascript handled (ignored)
*/
int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath) {
#if HTS_MAKE_KEYWORD_INDEX
	char catbuff[CATBUFF_SIZE];
  int intag=0,inscript=0,incomment=0;
  char keyword[KEYW_LEN+32];
  int i=0;
  //
  int WordIndexSize=1024;
  inthash WordIndexHash=NULL;
  FILE *tmpfp=NULL;
  //

  // Check parameters
  if (!html_data)
    return 0;
  if (!size)
    return 0;
  if (!mime)
    return 0;
  if (!filename)
    return 0;

  // Init ?
  if (hts_index_init) {
    remove(concat(catbuff,indexpath,"index.txt"));
    remove(concat(catbuff,indexpath,"sindex.html"));
    hts_index_init=0;
  }

  // Check MIME type
  if (is_html_mime_type(mime)) {
    inscript=0;
  } 
  // FIXME - temporary fix for image/svg+xml (svg)
  // "IN XML" (html like, in fact :) )
  else if (
    (strfield2(mime,"image/svg+xml"))
    ||
    (strfield2(mime,"image/svg-xml"))
#if HTS_USEMMS
		||
		strfield2(mime,"video/x-ms-asf")
#endif
    ) {
    inscript=0;
  }
  else if (
    (strfield2(mime,"application/x-javascript"))
    || (strfield2(mime,"text/css"))
    ) {
    inscript=1;
  //} else if (strfield2(mime, "text/vnd.wap.wml")) {   // humm won't work in many cases
  //  inscript=0;
  } else
    return 0;

  // Temporary file
  tmpfp = tmpfile();
  if (!tmpfp)
    return 0;

  // Create hash structure
  // Hash tables rulez da world!
  WordIndexHash=inthash_new(WordIndexSize);
  if (!WordIndexHash)
    return 0;

  // Start indexing this page
  keyword[0]='\0';
  while(i<size) {
    if (strfield(html_data + i , "<script")) {
      inscript=1;
    } 
    else if (strfield(html_data + i , "<!--")) {
      incomment=1;
    }
    else if (strfield(html_data + i , "</script")) {
      if (!incomment)
        inscript=0;
    } 
    else if (strfield(html_data + i , "-->")) {
      incomment=0;
    }
    else if (html_data[i]=='<') {
      if (!inscript)
        intag=1;
    }    
    else if (html_data[i]=='>') {
      intag=0;
    }    
    else {    
      // Okay, parse keywords
      if ( (!inscript) && (!incomment) && (!intag) ) {
        char cchar=html_data[i];
        int pos;
        int len = (int) strlen(keyword);
        
        // Replace (ignore case, and so on..)
        if ((pos=strcpos(KEYW_TRANSCODE_FROM,cchar))>=0)
          cchar=KEYW_TRANSCODE_TO[pos];
        
        if (strchr(KEYW_ACCEPT,cchar)) {
          /* Ignore some characters at begining */
          if ((len>0) || (!strchr(KEYW_IGNORE_BEG,cchar))) {
            keyword[len++]=cchar;
            keyword[len]='\0';
          }
        } else if ( (strchr(KEYW_SPACE,cchar)) || (!cchar) ) {


          /* Avoid these words */
          if (len>0) {
            if (strchr(KEYW_NOT_BEG,keyword[0])) {
              keyword[(len=0)]='\0';
            }
          }

          /* Strip ending . and so */
          {
            int ok=0;
            while((len = (int) strlen(keyword)) && (!ok)) {
              if (strchr(KEYW_STRIP_END,keyword[len-1])) {      /* strip it */
                keyword[len-1]='\0';
              } else
                ok=1;
            }
          }
          
          /* Store it ? */
          if (len >= KEYW_MIN_LEN ) {
            hts_primindex_words++;
            if (inthash_inc(WordIndexHash,keyword)) {   /* added new */
              fprintf(tmpfp,"%s\n",keyword);
            }
          }
          keyword[(len=0)]='\0';
        } else      /* Invalid */
          keyword[(len=0)]='\0';

        if (len>KEYW_LEN) {
          keyword[(len=0)]='\0';
        }
      }
      
    }
    
    i++;
  }

  // Reset temp file
  fseek(tmpfp,0,SEEK_SET);

  // Process indexing for this page
  {
    //FILE* fp=NULL;
    //fp=fopen(concat(indexpath,"index.txt"),"ab");
    if (fp_tmpproject) {
      while(!feof(tmpfp)) {
        char line[KEYW_LEN + 32];
        linput(tmpfp,line,KEYW_LEN + 2);
        if (strnotempty(line)) {
          intptr_t e=0;
          if (inthash_read(WordIndexHash,line,&e)) {
            //if (e) {
            char BIGSTK savelst[HTS_URLMAXSIZE*2];
            e++;          /* 0 means "once" */
            
            if (strncmp((const char*)fslash(catbuff,(char*)indexpath),filename,strlen(indexpath))==0)  // couper
              strcpybuff(savelst,filename+strlen(indexpath));
            else
              strcpybuff(savelst,filename);
            
            // Add entry for this file and word
            fprintf(fp_tmpproject,"%s %d %s\n",line,(int) (KEYW_SORT_MAXCOUNT - e),savelst);
            hts_primindex_size++;
            //}
          }
        }
      }
      //fclose(fp);
    }
  }

  // Delete temp file
  fclose(tmpfp);
  tmpfp=NULL;

  // Clear hash table
  inthash_delete(&WordIndexHash);
#endif
  return 1;
}
示例#13
0
文件: htsweb.c 项目: eatonmi/Crawler
int __cdecl htsshow_loop(t_hts_callbackarg *carg, httrackp *opt, lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) {    // appelé à chaque boucle de HTTrack
  static TStamp prev_mytime=0; /* ok */
  static t_InpInfo SInfo; /* ok */
  //
  TStamp mytime;
  long int rate=0;
  //
  int stat_written=-1;
  int stat_updated=-1;
  int stat_errors=-1;
  int stat_warnings=-1;
  int stat_infos=-1;
  int nbk=-1;
  LLint nb=-1;
  int stat_nsocket=-1;
  LLint stat_bytes=-1;
  LLint stat_bytes_recv=-1;
  int irate=-1;
  //
  char st[256];

  /* Exit now */
  if (commandEndRequested == 2)
    return 0;

  /* Lock */
  webhttrack_lock();

  if (stats) {
    stat_written=stats->stat_files;
    stat_updated=stats->stat_updated_files;
    stat_errors=stats->stat_errors;
    stat_warnings=stats->stat_warnings;
    stat_infos=stats->stat_infos;
    nbk=stats->nbk;
    stat_nsocket=stats->stat_nsocket;
    irate=(int)stats->rate;
    nb=stats->nb;
    stat_bytes=stats->nb;
    stat_bytes_recv=stats->HTS_TOTAL_RECV;
  }
  
  mytime=mtime_local();
  if ((stat_time>0) && (stat_bytes_recv>0))
    rate=(int)(stat_bytes_recv/stat_time);
  else
    rate=0;    // pas d'infos
  
  /* Infos */
  if (stat_bytes>=0) SInfo.stat_bytes=stat_bytes;      // bytes
  if (stat_time>=0) SInfo.stat_time=stat_time;         // time
  if (lien_tot>=0) SInfo.lien_tot=lien_tot; // nb liens
  if (lien_n>=0) SInfo.lien_n=lien_n;       // scanned
  SInfo.stat_nsocket=stat_nsocket;          // socks
  if (rate>0)  SInfo.rate=rate;                // rate
  if (irate>=0) SInfo.irate=irate;             // irate
  if (SInfo.irate<0) SInfo.irate=SInfo.rate;
  if (nbk>=0) SInfo.stat_back=nbk;
  if (stat_written>=0) SInfo.stat_written=stat_written;
  if (stat_updated>=0) SInfo.stat_updated=stat_updated;
  if (stat_errors>=0)  SInfo.stat_errors=stat_errors;
  if (stat_warnings>=0)  SInfo.stat_warnings=stat_warnings;
  if (stat_infos>=0)  SInfo.stat_infos=stat_infos;
  
  
  st[0]='\0';
  qsec2str(st,stat_time);
  
  /* Set keys */
  smallserver_setkeyint("info.stat_bytes", SInfo.stat_bytes);
  smallserver_setkeyint("info.stat_time", SInfo.stat_time);
  smallserver_setkeyint("info.lien_tot", SInfo.lien_tot);
  smallserver_setkeyint("info.lien_n", SInfo.lien_n);
  smallserver_setkeyint("info.stat_nsocket", SInfo.stat_nsocket);
  smallserver_setkeyint("info.rate", SInfo.rate);
  smallserver_setkeyint("info.irate", SInfo.irate);
  smallserver_setkeyint("info.stat_back", SInfo.stat_back);
  smallserver_setkeyint("info.stat_written", SInfo.stat_written);
  smallserver_setkeyint("info.stat_updated", SInfo.stat_updated);
  smallserver_setkeyint("info.stat_errors", SInfo.stat_errors);
  smallserver_setkeyint("info.stat_warnings", SInfo.stat_warnings);
  smallserver_setkeyint("info.stat_infos", SInfo.stat_infos);
  /* */
  smallserver_setkey("info.stat_time_str", st);
  
  if ( ((mytime - prev_mytime)>100) || ((mytime - prev_mytime)<0) ) {
    prev_mytime=mytime;
    
    
    // parcourir registre des liens
    if (back_index>=0 && back_max > 0) {  // seulement si index passé
      int j,k;
      int index=0;
      int ok=0;         // idem
      int l;            // idem
      //
      t_StatsBuffer StatsBuffer[NStatsBuffer];
      
      {
        int i;
        for(i=0;i<NStatsBuffer;i++) {
          strcpybuff(StatsBuffer[i].state,"");
          strcpybuff(StatsBuffer[i].name,"");
          strcpybuff(StatsBuffer[i].file,"");
          strcpybuff(StatsBuffer[i].url_sav,"");
          StatsBuffer[i].back=0;
          StatsBuffer[i].size=0;
          StatsBuffer[i].sizetot=0;
        }
      }
      for(k=0;k<2;k++) {    // 0: lien en cours 1: autres liens
        for(j=0;(j<3) && (index<NStatsBuffer);j++) {  // passe de priorité
          int _i;
          for(_i=0+k;(_i< max(back_max*k,1) ) && (index<NStatsBuffer);_i++) {  // no lien
            int i=(back_index+_i)%back_max;    // commencer par le "premier" (l'actuel)
            if (back[i].status>=0) {     // signifie "lien actif"
              // int ok=0;  // OPTI
              ok=0;
              switch(j) {
              case 0:     // prioritaire
                if ((back[i].status>0) && (back[i].status<99)) {
                  strcpybuff(StatsBuffer[index].state,"receive"); ok=1;
                }
                break;
              case 1:
                if (back[i].status==STATUS_WAIT_HEADERS) {
                  strcpybuff(StatsBuffer[index].state,"request"); ok=1;
                }
                else if (back[i].status==STATUS_CONNECTING) {
									strcpybuff(StatsBuffer[index].state,"connect"); ok=1;
								}
								else if (back[i].status==STATUS_WAIT_DNS) {
									strcpybuff(StatsBuffer[index].state,"search"); ok=1;
								}
								else if (back[i].status==STATUS_FTP_TRANSFER) {    // ohh le beau ftp
									char proto[] = "ftp";
									if (back[i].url_adr[0]) {
										char* ep = strchr(back[i].url_adr, ':');
										char* eps = strchr(back[i].url_adr, '/');
										int count;
										if (ep != NULL && ep < eps && (count = (int) (ep - back[i].url_adr) ) < 4) {
											proto[0] = '\0';
											strncat(proto, back[i].url_adr, count);
										}
									}
									sprintf(StatsBuffer[index].state,"%s: %s",proto,back[i].info); ok=1;
								}
								break;
							default:
								if (back[i].status==STATUS_READY) {  // prêt
                  if ((back[i].r.statuscode==HTTP_OK)) {
                    strcpybuff(StatsBuffer[index].state,"ready"); ok=1;
                  }
                  else if ((back[i].r.statuscode>=100) && (back[i].r.statuscode<=599)) {
                    char tempo[256]; tempo[0]='\0';
                    infostatuscode(tempo,back[i].r.statuscode);
                    strcpybuff(StatsBuffer[index].state,tempo); ok=1;
                  }
                  else {
                    strcpybuff(StatsBuffer[index].state,"error"); ok=1;
                  }
                }
                break;
              }
              
              if (ok) {
                char s[HTS_URLMAXSIZE*2];
                //
                StatsBuffer[index].back=i;        // index pour + d'infos
                //
                s[0]='\0';
                strcpybuff(StatsBuffer[index].url_sav,back[i].url_sav);   // pour cancel
                if (strcmp(back[i].url_adr,"file://"))
                  strcatbuff(s,back[i].url_adr);
                else
                  strcatbuff(s,"localhost");
                if (back[i].url_fil[0]!='/')
                  strcatbuff(s,"/");
                strcatbuff(s,back[i].url_fil);
                
                StatsBuffer[index].file[0]='\0';
                {
                  char* a=strrchr(s,'/');
                  if (a) {
                    strncatbuff(StatsBuffer[index].file,a,200);
                    *a='\0';
                  }
                }
                
                if ((l = (int) strlen(s))<MAX_LEN_INPROGRESS)
                  strcpybuff(StatsBuffer[index].name,s);
                else {
                  // couper
                  StatsBuffer[index].name[0]='\0';
                  strncatbuff(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2);
                  strcatbuff(StatsBuffer[index].name,"...");
                  strcatbuff(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2);
                }
                
                if (back[i].r.totalsize>0) {  // taille prédéfinie
                  StatsBuffer[index].sizetot=back[i].r.totalsize;
                  StatsBuffer[index].size=back[i].r.size;
                } else {  // pas de taille prédéfinie
                  if (back[i].status==STATUS_READY) {  // prêt
                    StatsBuffer[index].sizetot=back[i].r.size;
                    StatsBuffer[index].size=back[i].r.size;
                  } else {
                    StatsBuffer[index].sizetot=8192;
                    StatsBuffer[index].size=(back[i].r.size % 8192);
                  }
                }
                index++;
              }
            }
          }
        }
      }

      /* Display current job */
      {
        int parsing=0;
        if (commandEndRequested)
          smallserver_setkey("info.currentjob", "finishing pending transfers - Select [Cancel] to stop now!");
        else if (!(parsing=hts_is_parsing(opt, -1)))
          smallserver_setkey("info.currentjob", "receiving files");
        else {
          char tmp[1024];
          tmp[0] = '\0';
          switch(hts_is_testing(opt)) {
          case 0:
            sprintf(tmp, "parsing HTML file (%d%%)",parsing);
            break;
          case 1:
            sprintf(tmp, "parsing HTML file: testing links (%d%%)",parsing);
            break;
          case 2:
            sprintf(tmp, "purging files");
            break;
          case 3:
            sprintf(tmp, "loading cache");
            break;
          case 4:
            sprintf(tmp, "waiting (scheduler)");
            break;
          case 5:
            sprintf(tmp, "waiting (throttle)");
            break;
          }
          smallserver_setkey("info.currentjob", tmp);
        }
      }

      /* Display background jobs */
      {
        int i;
        for(i=0;i<NStatsBuffer;i++) {
          if (strnotempty(StatsBuffer[i].state)) {
						strc_int2bytes2 strc;
            smallserver_setkeyarr("info.state[", i, "]", StatsBuffer[i].state);
            smallserver_setkeyarr("info.name[", i, "]", StatsBuffer[i].name);
            smallserver_setkeyarr("info.file[", i, "]", StatsBuffer[i].file);
            smallserver_setkeyarr("info.size[", i, "]", int2bytes(&strc,StatsBuffer[i].size));
            smallserver_setkeyarr("info.sizetot[", i, "]", int2bytes(&strc,StatsBuffer[i].sizetot));
            smallserver_setkeyarr("info.url_adr[", i, "]", StatsBuffer[i].url_adr);
            smallserver_setkeyarr("info.url_fil[", i, "]", StatsBuffer[i].url_fil);
            smallserver_setkeyarr("info.url_sav[", i, "]", StatsBuffer[i].url_sav);
          }
        }
      }


    }   
      
  }
  
  /* UnLock */
  webhttrack_release();
  
  return 1;
}
示例#14
0
static int hts_acceptlink_(httrackp* opt,
													int ptr,int lien_tot,lien_url** liens,
													char* adr,char* fil,
													char* tag, char* attribute,
													int* set_prio_to,
													int* just_test_it) 
{
  int forbidden_url=-1;
  int meme_adresse;
	int embedded_triggered = 0;
#define _FILTERS     (*opt->filters.filters)
#define _FILTERS_PTR (opt->filters.filptr)
#define _ROBOTS      ((robots_wizard*)opt->robotsptr)
  int may_set_prio_to=0;

  // -------------------- PHASE 0 --------------------

  /* Infos */
  if ((opt->debug>1) && (opt->log!=NULL)) {
    HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"wizard test begins: %s%s"LF,adr,fil);
    test_flush;
  }
  
  /* Already exists? Then, we know that we knew that this link had to be known */
  if (adr[0] != '\0'
    && fil[0] != '\0'
    && opt->hash != NULL
    && hash_read(opt->hash, adr, fil, 1, opt->urlhack) >= 0
    ) {
    return 0;  /* Yokai */
  }
  
  // -------------------- PRELUDE OF PHASE 3-BIS --------------------

	/* Built-in known tags (<img src=..>, ..) */
	if (forbidden_url != 0 && opt->nearlink && tag != NULL && attribute != NULL) {
		int i;
		for(i = 0 ; hts_detect_embed[i].tag != NULL ; i++) {
			if (cmp_token(tag, hts_detect_embed[i].tag)
				&& cmp_token(attribute, hts_detect_embed[i].attr)
				) 
			{
				embedded_triggered = 1;
				break;
			}
		}
	}


  // -------------------- PHASE 1 --------------------

  /* Doit-on traiter les non html? */
  if ((opt->getmode & 2)==0) {    // non on ne doit pas
    if (!ishtml(opt,fil)) {  // non il ne faut pas
      //adr[0]='\0';    // ne pas traiter ce lien, pas traiter
      forbidden_url=1;    // interdire récupération du lien
      if ((opt->debug>1) && (opt->log!=NULL)) {
        HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"non-html file ignored at %s : %s"LF,adr,fil);
        test_flush;
      }
      
    }
  }
  
  /* Niveau 1: ne pas parser suivant! */
  if (ptr>0) {
    if ( ( liens[ptr]->depth <= 0 ) || ( liens[ptr]->depth <= 1 && !embedded_triggered ) ) {
      forbidden_url=1;    // interdire récupération du lien
      if ((opt->debug>1) && (opt->log!=NULL)) {
        HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"file from too far level ignored at %s : %s"LF,adr,fil);
        test_flush;
      }
    }
  }

  /* en cas d'échec en phase 1, retour immédiat! */
  if (forbidden_url == 1) {
    return forbidden_url;
  }
  
  // -------------------- PHASE 2 --------------------

  // ------------------------------------------------------
  // doit-on traiter ce lien?.. vérifier droits de déplacement
  meme_adresse=strfield2(adr,urladr);
  if ((opt->debug>1) && (opt->log!=NULL)) {
    HTS_LOG(opt,LOG_DEBUG); 
    if (meme_adresse) 
      fprintf(opt->log,"Compare addresses: %s=%s"LF,adr,urladr);
    else
      fprintf(opt->log,"Compare addresses: %s!=%s"LF,adr,urladr);
    test_flush;
  }
  if (meme_adresse) {  // même adresse 
    {  // tester interdiction de descendre
      // MODIFIE : en cas de remontée puis de redescente, il se pouvait qu'on ne puisse pas atteindre certains fichiers
      // problème: si un fichier est virtuellement accessible via une page mais dont le lien est sur une autre *uniquement*..
      char BIGSTK tempo[HTS_URLMAXSIZE*2];
      char BIGSTK tempo2[HTS_URLMAXSIZE*2];
      tempo[0] = tempo2[0] = '\0';
      
      // note (up/down): on calcule à partir du lien primaire, ET du lien précédent.
      // ex: si on descend 2 fois on peut remonter 1 fois
      
      if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) {
        if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) {
          if ((opt->debug>1) && (opt->log!=NULL)) {
            HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"build relative links to test: %s %s (with %s and %s)"LF,tempo,tempo2,liens[liens[ptr]->premier]->fil,liens[ptr]->fil);
            test_flush;
          }
          
          // si vient de primary, ne pas tester lienrelatif avec (car host "différent")
          /*if (liens[liens[ptr]->premier] == 0) {   // vient de primary
          }
          */
          
          // NEW: finalement OK, sauf pour les moved repérés par link_import
          // PROBLEME : annulé a cause d'un lien éventuel isolé accepté..qui entrainerait un miroir
          
          // (test même niveau (NOUVEAU à cause de certains problèmes de filtres non intégrés))
          // NEW
          if ( 
            (tempo[0]  != '\0' && tempo[1]  != '\0' && strchr(tempo+1,'/') == 0)
            ||
            (tempo2[0] != '\0' && tempo2[1] != '\0' && strchr(tempo2+1,'/') == 0) 
            ) {
            if (!liens[ptr]->link_import) {   // ne résulte pas d'un 'moved'
              forbidden_url=0;
              if ((opt->debug>1) && (opt->log!=NULL)) {
                HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil);
                test_flush;
             }
            }
          }
          
          // down
          if ( (strncmp(tempo,"../",3)) || (strncmp(tempo2,"../",3)))  {   // pas montée sinon ne nbous concerne pas
            int test1,test2;
            if (!strncmp(tempo,"../",3))
              test1=0;
            else
              test1 = (strchr(tempo +((*tempo =='/')?1:0),'/')!=NULL);
            if (!strncmp(tempo2,"../",3))
              test2=0;
            else
              test2 = (strchr(tempo2+((*tempo2=='/')?1:0),'/')!=NULL);
            if ( (test1) && (test2) ) {   // on ne peut que descendre
              if ((opt->seeker & 1)==0) {  // interdiction de descendre
                forbidden_url=1;
                if ((opt->debug>1) && (opt->log!=NULL)) {
                  HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"lower link canceled: %s%s"LF,adr,fil);
                  test_flush;
                }
              } else {    // autorisé à priori - NEW
                if (!liens[ptr]->link_import) {   // ne résulte pas d'un 'moved'
                  forbidden_url=0;
                  if ((opt->debug>1) && (opt->log!=NULL)) {
                    HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil);
                    test_flush;
                  }
                }
              }
            } else if ( (test1) || (test2) ) {   // on peut descendre pour accéder au lien
              if ((opt->seeker & 1)!=0) {  // on peut descendre - NEW
                if (!liens[ptr]->link_import) {   // ne résulte pas d'un 'moved'
                  forbidden_url=0;
                  if ((opt->debug>1) && (opt->log!=NULL)) {
                    HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil);
                    test_flush;
                  }
                }
              }
            }
          }
          
          
          // up
          if ( (!strncmp(tempo,"../",3)) && (!strncmp(tempo2,"../",3)) ) {    // impossible sans monter
            if ((opt->seeker & 2)==0) {  // interdiction de monter
              forbidden_url=1;
              if ((opt->debug>1) && (opt->log!=NULL)) {
                HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"upper link canceled: %s%s"LF,adr,fil);
                test_flush;
              }
            } else {       // autorisé à monter - NEW
              if (!liens[ptr]->link_import) {   // ne résulte pas d'un 'moved'
                forbidden_url=0;
                if ((opt->debug>1) && (opt->log!=NULL)) {
                  HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil);
                  test_flush;
                }
              }
            }
          } else if ( (!strncmp(tempo,"../",3)) || (!strncmp(tempo2,"../",3)) ) {    // Possible en montant
            if ((opt->seeker & 2)!=0) {  // autorisé à monter - NEW
              if (!liens[ptr]->link_import) {   // ne résulte pas d'un 'moved'
                forbidden_url=0;
                if ((opt->debug>1) && (opt->log!=NULL)) {
                  HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil);
                  test_flush;
                }
              }
            }  // sinon autorisé en descente
          }
          
          
        } else {
          if (opt->log) {
            fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil);
            test_flush;
          }
        }
      } else {
        if (opt->log) {
          fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil);
          test_flush;
        }
      }
      
    }  // tester interdiction de descendre?
    
    {  // tester interdiction de monter
      char BIGSTK tempo[HTS_URLMAXSIZE*2];
      char BIGSTK tempo2[HTS_URLMAXSIZE*2];
      if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) {
        if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) {
        } else {
          if (opt->log) { 
            fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil);
            test_flush;
          }
          
        }
      } else {
        if (opt->log) { 
          fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil);
          test_flush;
        }
        
      }
    }   // fin tester interdiction de monter
    
  } else {    // adresse différente, sortir?
    
    //if (!opt->wizard) {    // mode non wizard
    // doit-on traiter ce lien?.. vérifier droits de sortie
    switch((opt->travel & 255)) {
    case 0: 
      if (!opt->wizard)    // mode non wizard
        forbidden_url=1; break;    // interdicton de sortir au dela de l'adresse
    case 1: {              // sortie sur le même dom.xxx
      size_t i = strlen(adr)-1;
      size_t j = strlen(urladr)-1;
      while( (i>0) && (adr[i]!='.')) i--;
      while( (j>0) && (urladr[j]!='.')) j--;
      i--; j--;
      while( (i>0) && (adr[i]!='.')) i--;
      while( (j>0) && (urladr[j]!='.')) j--;
      if ((i>0) && (j>0)) {
        if (!strfield2(adr+i,urladr+j)) {   // !=
          if (!opt->wizard) {   // mode non wizard
            //printf("refused: %s\n",adr);
            forbidden_url=1;  // pas même domaine  
            if ((opt->debug>1) && (opt->log!=NULL)) {
              HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"foreign domain link canceled: %s%s"LF,adr,fil);
              test_flush;
            }
          }
          
        } else {
          if (opt->wizard) {   // mode wizard
            forbidden_url=0;  // même domaine  
            if ((opt->debug>1) && (opt->log!=NULL)) {
              HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"same domain link authorized: %s%s"LF,adr,fil);
              test_flush;
            }
          }
        }
        
      } else
        forbidden_url=1;
            } 
      break;  
    case 2: {                      // sortie sur le même .xxx
      size_t i = strlen(adr)-1;
      size_t j = strlen(urladr)-1;
      while( (i>0) && (adr[i]!='.')) i--;
      while( (j>0) && (urladr[j]!='.')) j--;
      if ((i>0) && (j>0)) {
        if (!strfield2(adr+i,urladr+j)) {   // !-
          if (!opt->wizard) {   // mode non wizard
            //printf("refused: %s\n",adr);
            forbidden_url=1;  // pas même .xx  
            if ((opt->debug>1) && (opt->log!=NULL)) {
              HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"foreign location link canceled: %s%s"LF,adr,fil);
              test_flush;
            }
          }
        } else {
          if (opt->wizard) {   // mode wizard
            forbidden_url=0;  // même domaine  
            if ((opt->debug>1) && (opt->log!=NULL)) {
              HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"same location link authorized: %s%s"LF,adr,fil);
              test_flush;
            }
          }
        }
      } else forbidden_url=1;     
            } 
      break;
    case 7:                 // everywhere!!
      if (opt->wizard) {   // mode wizard
        forbidden_url=0;
        break;
      }
    }  // switch
    
    // ANCIENNE POS -- récupérer les liens à côtés d'un lien (nearlink)
    
  }  // fin test adresse identique/différente

  // -------------------- PHASE 3 --------------------

  // récupérer les liens à côtés d'un lien (nearlink) (nvelle pos)
  if (forbidden_url != 0 && opt->nearlink) {
    if (!ishtml(opt,fil)) {  // non html
      //printf("ok %s%s\n",ad,fil);
      forbidden_url=0;    // autoriser
      may_set_prio_to=1+1; // set prio to 1 (parse but skip urls) if near is the winner
      if ((opt->debug>1) && (opt->log!=NULL)) {
        HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"near link authorized: %s%s"LF,adr,fil);
        test_flush;
      }
    }
  }

  // -------------------- PHASE 3-BIS --------------------

	/* Built-in known tags (<img src=..>, ..) */
	if (forbidden_url != 0 && embedded_triggered) {
		forbidden_url=0;    // autoriser
		may_set_prio_to=1+1; // set prio to 1 (parse but skip urls) if near is the winner
		if ((opt->debug>1) && (opt->log!=NULL)) {
			HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"near link authorized (friendly tag): %s%s"LF,adr,fil);
			test_flush;
		}
	}


  // -------------------- PHASE 4 --------------------
  
  // ------------------------------------------------------
  // Si wizard, il se peut qu'on autorise ou qu'on interdise 
  // un lien spécial avant même de tester sa position, sa hiérarchie etc.
  // peut court-circuiter le forbidden_url précédent
  if (opt->wizard) { // le wizard entre en action..
    //
    int question=1;         // poser une question                            
    int force_mirror=0;     // pour mirror links
    int filters_answer=0;   // décision prise par les filtres
    char BIGSTK l[HTS_URLMAXSIZE*2];
    char BIGSTK lfull[HTS_URLMAXSIZE*2];
    
    if (forbidden_url!=-1) question=0;  // pas de question, résolu
    
    // former URL complète du lien actuel
    strcpybuff(l,jump_identification(adr));
    if (*fil!='/') strcatbuff(l,"/");
    strcatbuff(l,fil);
    // full version (http://foo:[email protected]/bar.html)
    if (!link_has_authority(adr))
      strcpybuff(lfull,"http://");
    else
      lfull[0]='\0';
    strcatbuff(lfull,adr);
    if (*fil!='/') strcatbuff(lfull,"/");
    strcatbuff(lfull,fil);
    
    // tester filters (URLs autorisées ou interdites explicitement)
    
    // si lien primaire on saute le joker, on est pas lémur
    if (ptr==0) {  // lien primaire, autoriser
      question=1;    // la question sera résolue automatiquement
      forbidden_url=0;
      may_set_prio_to=0;    // clear may-set flag
    } else {
      // eternal depth first
      // vérifier récursivité extérieure
      if (opt->extdepth>0) {
        if ( /*question && */ (ptr>0) && (!force_mirror)) {
          // well, this is kinda a hak
          // we don't want to mirror EVERYTHING, and we have to decide where to stop
          // there is no way yet to tag "external" links, and therefore links that are
          // "weak" (authorized depth < external depth) are just not considered for external
          // hack
          if (liens[ptr]->depth > opt->extdepth) {
            // *set_prio_to = opt->extdepth + 1;
            *set_prio_to = 1 + (opt->extdepth);
            may_set_prio_to=0;  // clear may-set flag
            forbidden_url=0;    // autorisé
            question=0;         // résolution auto
            if ((opt->debug>1) && (opt->log!=NULL)) {
              if (question) {
                HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) ambiguous link accepted (external depth): link %s at %s%s"LF,l,urladr,urlfil);
              } else {
                HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) forced to accept link (external depth): link %s at %s%s"LF,l,urladr,urlfil);
              }
              test_flush;
            }
            
          }
        }
      }  
      
      // filters
      {
        int jok;
        char* mdepth="";
        // filters, 0=sait pas 1=ok -1=interdit
        {
          int jokDepth1=0,jokDepth2=0;
          int jok1=0,jok2=0;
          jok1  = fa_strjoker(/*url*/0, _FILTERS,*_FILTERS_PTR,lfull,NULL,NULL,&jokDepth1);
          jok2 =  fa_strjoker(/*url*/0, _FILTERS,*_FILTERS_PTR,l,    NULL,NULL,&jokDepth2);
          if (jok2 == 0) {      // #2 doesn't know
            jok = jok1;        // then, use #1
            mdepth = _FILTERS[jokDepth1];
          } else if (jok1 == 0) { // #1 doesn't know
            jok = jok2;        // then, use #2
            mdepth = _FILTERS[jokDepth2];
          } else if (jokDepth1 >= jokDepth2) { // #1 matching rule is "after" #2, then it is prioritary
            jok = jok1;
            mdepth = _FILTERS[jokDepth1];
          } else {                             // #2 matching rule is "after" #1, then it is prioritary
            jok = jok2;
            mdepth = _FILTERS[jokDepth2];
          }
        }
        
        if (jok == 1) {   // autorisé
          filters_answer=1;  // décision prise par les filtres
          question=0;    // ne pas poser de question, autorisé
          forbidden_url=0;  // URL autorisée
          may_set_prio_to=0;    // clear may-set flag
          if ((opt->debug>1) && (opt->log!=NULL)) {
            HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) explicit authorized (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil);
            test_flush;
          }
        } else if (jok == -1) {  // forbidden
          filters_answer=1;  // décision prise par les filtres
          question=0;    // ne pas poser de question:
          forbidden_url=1;   // URL interdite
          if ((opt->debug>1) && (opt->log!=NULL)) {
            HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) explicit forbidden (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil);
            test_flush;
          }
        }  // sinon on touche à rien
      }
    }
    
    // vérifier mode mirror links
    if (question) {
      if (opt->mirror_first_page) {    // mode mirror links
        if (liens[ptr]->precedent==0) {  // parent=primary!
          forbidden_url=0;    // autorisé
          may_set_prio_to=0;    // clear may-set flag
          question=1;         // résolution auto
          force_mirror=5;     // mirror (5)
          if ((opt->debug>1) && (opt->log!=NULL)) {
            HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) explicit mirror link: link %s at %s%s"LF,l,urladr,urlfil);
            test_flush;
          }
        }
      }
    }
    
    // on doit poser la question.. peut on la poser?
    // (oui je sais quel preuve de délicatesse, merci merci)      
    if ((question) && (ptr>0) && (!force_mirror)) {
      if (opt->wizard==2) {    // éliminer tous les liens non répertoriés comme autorisés (ou inconnus)
        question=0;
        forbidden_url=1;
        if ((opt->debug>1) && (opt->log!=NULL)) {
          HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) ambiguous forbidden link: link %s at %s%s"LF,l,urladr,urlfil);
          test_flush;
        }
      }
    }
    
    // vérifier robots.txt
    if (opt->robots) {
      int r = checkrobots(_ROBOTS,adr,fil);
      if (r == -1) {    // interdiction
#if DEBUG_ROBOTS
        printf("robots.txt forbidden: %s%s\n",adr,fil);
#endif
        // question résolue, par les filtres, et mode robot non strict
        if ((!question) && (filters_answer) && (opt->robots == 1) && (forbidden_url!=1)) {
          r=0;    // annuler interdiction des robots
          if (!forbidden_url) {
            if ((opt->debug>1) && (opt->log!=NULL)) {
              HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Warning link followed against robots.txt: link %s at %s%s"LF,l,adr,fil);
              test_flush;
            }
          }
        }
        if (r == -1) {    // interdire
          forbidden_url=1;
          question=0;
          if ((opt->debug>1) && (opt->log!=NULL)) {
            HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(robots.txt) forbidden link: link %s at %s%s"LF,l,adr,fil);
            test_flush;
          }
        }
      }
    }
    
    if (!question) {
      if ((opt->debug>1) && (opt->log!=NULL)) {
        if (!forbidden_url) {
          HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) shared foreign domain link: link %s at %s%s"LF,l,urladr,urlfil);
        } else {
          HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) cancelled foreign domain link: link %s at %s%s"LF,l,urladr,urlfil);
        }
        test_flush;
      }
#if BDEBUG==3
      printf("at %s in %s, wizard says: url %s ",urladr,urlfil,l);
      if (forbidden_url) printf("cancelled"); else printf(">SHARED<");
      printf("\n");
#endif 
    }

    /* en cas de question, ou lien primaire (enregistrer autorisations) */
    if (question || (ptr==0)) {
      const char* s;
      int n=0;
      
      // si primaire (plus bas) alors ...
      if ((ptr!=0) && (force_mirror==0)) {
        char BIGSTK tempo[HTS_URLMAXSIZE*2];
        tempo[0]='\0';
        strcatbuff(tempo,adr);
        strcatbuff(tempo,fil);
        s = RUN_CALLBACK1(opt, query3, tempo);
        if (strnotempty(s)==0)  // entrée
            n=0;
          else if (isdigit((unsigned char)*s))
            sscanf(s,"%d",&n);
          else {
            switch(*s) {
            case '*': n=-1; break;
            case '!': n=-999; {
              /*char *a;
              int i;                                    
              a=copie_de_adr-128;
              if (a<r.adr) a=r.adr;
              for(i=0;i<256;i++) {
                if (a==copie_de_adr) printf("\nHERE:\n");
                printf("%c",*a++);
              }
              printf("\n\n");
              */
                      }
              break;
            default: n=-999; printf("What did you say?\n"); break;
              
            } 
          }
        io_flush;
      } else {   // lien primaire: autoriser répertoire entier       
        if (!force_mirror) {
          if ((opt->seeker & 1)==0) {  // interdiction de descendre
            n=7;
          } else {
            n=5;   // autoriser miroir répertoires descendants (lien primaire)
          }
        } else   // forcer valeur (sub-wizard)
          n=force_mirror;
      }
      
      /* sanity check - reallocate filters HERE */
      if ((*_FILTERS_PTR) + 1 >= opt->maxfilter) {
        opt->maxfilter += HTS_FILTERSINC;
        if (filters_init(&_FILTERS, opt->maxfilter, HTS_FILTERSINC) == 0) {
          printf("PANIC! : Too many filters : >%d [%d]\n", (*_FILTERS_PTR),__LINE__);
          fflush(stdout);
          if (opt->log) {
            fprintf(opt->log,LF"Too many filters, giving up..(>%d)"LF, (*_FILTERS_PTR) );
            fprintf(opt->log,"To avoid that: use #F option for more filters (example: -#F5000)"LF);
            test_flush;
          }
          assertf("too many filters - giving up" == NULL);    // wild..
        }
      }

      // here we have enough room for a new filter if necessary
      switch(n) {
      case -1: // sauter tout le reste
        forbidden_url=1;
        opt->wizard=2;    // sauter tout le reste
        break;
      case 0:    // interdire les mêmes liens: adr/fil
        forbidden_url=1; 
        HT_INSERT_FILTERS0;    // insérer en 0
        strcpybuff(_FILTERS[0],"-");
        strcatbuff(_FILTERS[0],jump_identification(adr));
        if (*fil!='/') strcatbuff(_FILTERS[0],"/");
        strcatbuff(_FILTERS[0],fil);
        break;
        
      case 1: // éliminer répertoire entier et sous rép: adr/path/ *
        forbidden_url=1;
        {
          size_t i = strlen(fil)-1;
          while((fil[i]!='/') && (i>0)) i--;
          if (fil[i]=='/') {
            HT_INSERT_FILTERS0;    // insérer en 0
            strcpybuff(_FILTERS[0],"-");
            strcatbuff(_FILTERS[0],jump_identification(adr));
            if (*fil!='/') strcatbuff(_FILTERS[0],"/");
            strncatbuff(_FILTERS[0] ,fil,i);
            if (_FILTERS[0][strlen(_FILTERS[0])-1]!='/') 
              strcatbuff(_FILTERS[0],"/");
            strcatbuff(_FILTERS[0],"*");
          }
        }            
        
        // ** ...
        break;
        
      case 2:    // adresse adr*
        forbidden_url=1;
        HT_INSERT_FILTERS0;    // insérer en 0                                
        strcpybuff(_FILTERS[0],"-");
        strcatbuff(_FILTERS[0],jump_identification(adr));
        strcatbuff(_FILTERS[0],"*");
        break;
        
      case 3: // ** A FAIRE
        forbidden_url=1;
        /*
        {
        int i=strlen(adr)-1;
        while((adr[i]!='/') && (i>0)) i--;
        if (i>0) {
        
          }
          
      }*/
        
        break;
        //
      case 4:    // same link
        // PAS BESOIN!!
        /*HT_INSERT_FILTERS0;    // insérer en 0                                
        strcpybuff(_FILTERS[0],"+");
        strcatbuff(_FILTERS[0],adr);
        if (*fil!='/') strcatbuff(_FILTERS[0],"/");
        strcatbuff(_FILTERS[0],fil);*/
        
        
        // étant donné le renversement wizard/primary filter (les primary autorisent up/down ET interdisent)
        // il faut éviter d'un lien isolé effectue un miroir total..
        
        *set_prio_to = 0+1;    // niveau de récursion=0 (pas de miroir)
        
        break;
        
      case 5:    // autoriser répertoire entier et fils
        if ((opt->seeker & 2)==0) {  // interdiction de monter
          size_t i = strlen(fil)-1;
          while((fil[i]!='/') && (i>0)) i--;
          if (fil[i]=='/') {
            HT_INSERT_FILTERS0;    // insérer en 0                                
            strcpybuff(_FILTERS[0],"+");
            strcatbuff(_FILTERS[0],jump_identification(adr));
            if (*fil!='/') strcatbuff(_FILTERS[0],"/");
            strncatbuff(_FILTERS[0],fil,i+1);
            strcatbuff(_FILTERS[0],"*");
          }
        } else {    // autoriser domaine alors!!
          HT_INSERT_FILTERS0;    // insérer en 0                                strcpybuff(filters[filptr],"+");
          strcpybuff(_FILTERS[0],"+");
          strcatbuff(_FILTERS[0],jump_identification(adr));
          strcatbuff(_FILTERS[0],"*");
        }
        break;
        
      case 6:    // same domain
        HT_INSERT_FILTERS0;    // insérer en 0                                strcpybuff(filters[filptr],"+");
        strcpybuff(_FILTERS[0],"+");
        strcatbuff(_FILTERS[0],jump_identification(adr));
        strcatbuff(_FILTERS[0],"*");
        break;
        //
      case 7:    // autoriser ce répertoire
        {
          size_t i = strlen(fil)-1;
          while((fil[i]!='/') && (i>0)) i--;
          if (fil[i]=='/') {
            HT_INSERT_FILTERS0;    // insérer en 0                                
            strcpybuff(_FILTERS[0],"+");
            strcatbuff(_FILTERS[0],jump_identification(adr));
            if (*fil!='/') strcatbuff(_FILTERS[0],"/");
            strncatbuff(_FILTERS[0],fil,i+1);
            strcatbuff(_FILTERS[0],"*[file]");
          }
        }
        
        break;
        
      case 50:    // on fait rien
        break;
      }  // switch 
                              
    }  // test du wizard sur l'url
  }  // fin du test wizard..

  // -------------------- PHASE 5 --------------------

  // lien non autorisé, peut-on juste le tester?
  if (just_test_it) {
    if (forbidden_url==1) {
      if (opt->travel&256) {    // tester tout de même
        if (strfield(adr,"ftp://")==0
#if HTS_USEMMS
					&& strfield(adr,"mms://")==0
#endif
					) {    // PAS ftp!
          forbidden_url=1;    // oui oui toujours interdit (note: sert à rien car ==1 mais c pour comprendre)
          *just_test_it=1;     // mais on teste
          if ((opt->debug>1) && (opt->log!=NULL)) {
            HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Testing link %s%s"LF,adr,fil);
          }
        }
      }
    }
    //adr[0]='\0';  // cancel
  }

  // -------------------- FINAL PHASE --------------------
  // Test if the "Near" test won
  if (may_set_prio_to && forbidden_url == 0) {
    *set_prio_to = may_set_prio_to;
  }

  return forbidden_url;
#undef _FILTERS
#undef _FILTERS_PTR
#undef _ROBOTS
}
示例#15
0
/* Note: NOT utf-8 */
int optinclude_file(const char *name, int *argc, char **argv, char *x_argvblk,
                    int *x_ptr) {
  FILE *fp;

  fp = fopen(name, "rb");
  if (fp) {
    char line[256];
    int insert_after = 1;       /* first, insert after program filename */

    while(!feof(fp)) {
      char *a, *b;
      int result;

      /* read line */
      linput(fp, line, 250);
      hts_lowcase(line);
      if (strnotempty(line)) {
        /* no comment line: # // ; */
        if (strchr("#/;", line[0]) == NULL) {
          /* right trim */
          a = line + strlen(line) - 1;
          while(is_realspace(*a))
            *(a--) = '\0';
          /* jump "set " and spaces */
          a = line;
          while(is_realspace(*a))
            a++;
          if (strncmp(a, "set", 3) == 0) {
            if (is_realspace(*(a + 3))) {
              a += 4;
            }
          }
          while(is_realspace(*a))
            a++;
          /* delete = ("sockets=8") */
          if ((b = strchr(a, '=')))
            *b = ' ';

          /* isolate option and parameter */
          b = a;
          while((!is_realspace(*b)) && (*b))
            b++;
          if (*b) {
            *b = '\0';
            b++;
          }
          /* a is now the option, b the parameter */

          {
            int return_argc;
            char return_error[256];
            char _tmp_argv[4][HTS_CDLMAXSIZE];
            char *tmp_argv[4];

            tmp_argv[0] = _tmp_argv[0];
            tmp_argv[1] = _tmp_argv[1];
            tmp_argv[2] = _tmp_argv[2];
            tmp_argv[3] = _tmp_argv[3];
            strcpybuff(_tmp_argv[0], "--");
            strcatbuff(_tmp_argv[0], a);
            strcpybuff(_tmp_argv[1], b);

            result =
              optalias_check(2, (const char *const *) tmp_argv, 0, &return_argc,
                             (tmp_argv + 2), return_error);
            if (!result) {
              printf("%s\n", return_error);
            } else {
              int insert_after_argc;

              /* Insert parameters BUT so that they can be in the same order */
              /* temporary argc: Number of parameters after minus insert_after_argc */
              insert_after_argc = (*argc) - insert_after;
              cmdl_ins((tmp_argv[2]), insert_after_argc, (argv + insert_after),
                       x_argvblk, (*x_ptr));
              *argc = insert_after_argc + insert_after;
              insert_after++;
              /* Second one */
              if (return_argc > 1) {
                insert_after_argc = (*argc) - insert_after;
                cmdl_ins((tmp_argv[3]), insert_after_argc,
                         (argv + insert_after), x_argvblk, (*x_ptr));
                *argc = insert_after_argc + insert_after;
                insert_after++;
              }
              /* increment to nbr of used parameters */
              /* insert_after+=result; */
            }
          }
        }

      }
    }
    fclose(fp);
    return 1;
  }
  return 0;
}
示例#16
0
// forme à partir d'un lien et du contexte (origin_fil et origin_adr d'où il est tiré) adr et fil
// [adr et fil sont des buffers de 1ko]
// 0 : ok
// -1 : erreur
// -2 : protocole non supporté (ftp)
int ident_url_relatif(const char *lien, const char *origin_adr,
                      const char *origin_fil,
                      lien_adrfil* const adrfil) {
  int ok = 0;
  int scheme = 0;

  assertf(adrfil != NULL);

  adrfil->adr[0] = '\0';
  adrfil->fil[0] = '\0';                //effacer buffers

  // lien non vide!
  if (strnotempty(lien) == 0)
    return -1;                  // erreur!

  // Scheme?
  {
    const char *a = lien;

    while(isalpha((unsigned char) *a))
      a++;
    if (*a == ':')
      scheme = 1;
  }

  // filtrer les parazites (mailto & cie)
  // scheme+authority (//)
  if ((strfield(lien, "http://"))       // scheme+//
      || (strfield(lien, "file://"))    // scheme+//
      || (strncmp(lien, "//", 2) == 0)  // // sans scheme (-> default)
    ) {
    if (ident_url_absolute(lien, adrfil) == -1) {
      ok = -1;                  // erreur URL
    }
  } else if (strfield(lien, "ftp://")) {
    // Note: ftp:foobar.gif is not valid
    if (ftp_available()) {      // ftp supporté
      if (ident_url_absolute(lien, adrfil) == -1) {
        ok = -1;                // erreur URL
      }
    } else {
      ok = -2;                  // non supporté
    }
#if HTS_USEOPENSSL
  } else if (strfield(lien, "https://")) {
    // Note: ftp:foobar.gif is not valid
    if (ident_url_absolute(lien, adrfil) == -1) {
      ok = -1;                // erreur URL
    }
#endif
  } else if ((scheme) && ((!strfield(lien, "http:"))
                          && (!strfield(lien, "https:"))
                          && (!strfield(lien, "ftp:"))
             )) {
    ok = -1;                    // unknown scheme
  } else {                      // c'est un lien relatif
    // On forme l'URL complète à partie de l'url actuelle
    // et du chemin actuel si besoin est.

    // sanity check
    if (origin_adr == NULL || origin_fil == NULL 
      || *origin_adr == '\0' || *origin_fil == '\0') {
      return -1;
    }

    // copier adresse
    if (((int) strlen(origin_adr) < HTS_URLMAXSIZE)
        && ((int) strlen(origin_fil) < HTS_URLMAXSIZE)
        && ((int) strlen(lien) < HTS_URLMAXSIZE)) {

      /* patch scheme if necessary */
      if (strfield(lien, "http:")) {
        lien += 5;
        strcpybuff(adrfil->adr, jump_protocol_const(origin_adr));     // même adresse ; protocole vide (http)
      } else if (strfield(lien, "https:")) {
        lien += 6;
        strcpybuff(adrfil->adr, "https://");    // même adresse forcée en https
        strcatbuff(adrfil->adr, jump_protocol_const(origin_adr));
      } else if (strfield(lien, "ftp:")) {
        lien += 4;
        strcpybuff(adrfil->adr, "ftp://");      // même adresse forcée en ftp
        strcatbuff(adrfil->adr, jump_protocol_const(origin_adr));
      } else {
        strcpybuff(adrfil->adr, origin_adr);    // même adresse ; et même éventuel protocole
      }

      if (*lien != '/') {       // sinon c'est un lien absolu
        if (*lien == '\0') {
          strcpybuff(adrfil->fil, origin_fil);
        } else if (*lien == '?') {      // example: a href="?page=2"
          char *a;

          strcpybuff(adrfil->fil, origin_fil);
          a = strchr(adrfil->fil, '?');
          if (a)
            *a = '\0';
          strcatbuff(adrfil->fil, lien);
        } else {
          const char *a = strchr(origin_fil, '?');

          if (a == NULL)
            a = origin_fil + strlen(origin_fil);
          while((*a != '/') && (a > origin_fil))
            a--;
          if (*a == '/') {      // ok on a un '/'
            if ((((int) (a - origin_fil)) + 1 + strlen(lien)) < HTS_URLMAXSIZE) {
              // copier chemin
              strncpy(adrfil->fil, origin_fil, ((int) (a - origin_fil)) + 1);
              *(adrfil->fil + ((int) (a - origin_fil)) + 1) = '\0';

              // copier chemin relatif
              if (((int) strlen(adrfil->fil) + (int) strlen(lien)) < HTS_URLMAXSIZE) {
                strcatbuff(adrfil->fil, lien + ((*lien == '/') ? 1 : 0));
                // simplifier url pour les ../
                fil_simplifie(adrfil->fil);
              } else
                ok = -1;        // erreur
            } else {            // erreur
              ok = -1;          // erreur URL
            }
          } else {              // erreur
            ok = -1;            // erreur URL
          }
        }
      } else {                  // chemin absolu
        // copier chemin directement
        strcatbuff(adrfil->fil, lien);
        fil_simplifie(adrfil->fil);
      }                         // *lien!='/'
    } else
      ok = -1;

  }                             // test news: etc.

  // case insensitive pour adresse
  {
    char *a = jump_identification(adrfil->adr);

    while(*a) {
      if ((*a >= 'A') && (*a <= 'Z'))
        *a += 'a' - 'A';
      a++;
    }
  }

  // IDNA / RFC 3492 (Punycode) handling for HTTP(s)
  if (!link_has_authority(adrfil->adr) || strfield(adrfil->adr, "https:")) {
    char *const a = jump_identification(adrfil->adr);
    // Non-ASCII characters (theorically forbidden, but browsers are lenient)
    if (!hts_isStringAscii(a, strlen(a))) {
      char *const idna = hts_convertStringUTF8ToIDNA(a, strlen(a));
      if (idna != NULL) {
        if (strlen(idna) < HTS_URLMAXSIZE) {
          strcpybuff(a, idna);
        }
        free(idna);
      }
    }
  }

  return ok;
}