// conversion nom de fichier/dossier isolé vers 8-3 ou ISO9660 void longfile_to_83(int mode,char* n83,char* save) { int j=0,max=0; int i = 0; char nom[256]; char ext[256]; nom[0]=ext[0]='\0'; switch(mode) { case 1: max=8; break; case 2: max=31; break; default: max=8; break; } /* No starting . */ if (save[0] == '.') { save[0]='_'; } /* No multiple dots */ { char* last_dot=strrchr(save, '.'); char* dot; while((dot=strchr(save, '.'))) { *dot = '_'; } if (last_dot) { *last_dot='.'; } } /* Avoid: (ISO9660, but also suitable for 8-3) (Thanks to [email protected] for te hint) /:;?\#*~ 0x00-0x1f and 0x80-0xff */ for(i = 0 ; save[i] != 0 ; i++) { char a = save[i]; if (a >= 'a' && a <= 'z') { a -= 'a' - 'A'; } else if ( ! ( (a >= 'A' && a <= 'Z') || (a >= '0' && a <= '9') || a == '_' || a == '.') ) { a = '_'; } save[i] = a; } i=j=0; while((i<max) && (save[j]) && (save[j]!='.')) { if (save[j]!=' ') { nom[i]=save[j]; i++; } j++; } // recopier nom nom[i]='\0'; if (save[j]) { // il reste au moins un point i = (int) strlen(save)-1; while((i>0) && (save[i]!='.') && (save[i]!='/')) i--; // rechercher dernier . if (save[i]=='.') { // point! int j=0; i++; while((j<3) && (save[i]) ) { if (save[i]!=' ') { ext[j]=save[i]; j++; } i++; } ext[j]='\0'; } } // corriger vers 8-3 n83[0]='\0'; strncatbuff(n83,nom,max); if (strnotempty(ext)) { strcatbuff(n83,"."); strncatbuff(n83,ext,3); } }
// supercomparateur joker (tm) // compare a et b (b=avec joker dedans), case insensitive [voir CI] // renvoi l'adresse de la première lettre de la chaine // (càd *[..]toto.. renvoi adresse de toto dans la chaine) // accepte les délires du genre www.*.*/ * / * truc*.* // cet algo est 'un peu' récursif mais ne consomme pas trop de tm // * = toute lettre // --?-- : spécifique à HTTrack et aux ? HTS_INLINE const char *strjoker(const char *chaine, const char *joker, LLint * size, int *size_flag) { //int err=0; if (strnotempty(joker) == 0) { // fin de chaine joker if (strnotempty(chaine) == 0) // fin aussi pour la chaine: ok return chaine; else if (chaine[0] == '?') return chaine; // --?-- pour les index.html?Choix=2 else return NULL; // non trouvé } // on va progresser en suivant les 'mots' contenus dans le joker // un mot peut être un * ou bien toute autre séquence de lettres if (strcmp(joker, "*") == 0) { // ok, rien après return chaine; } // 1er cas: jokers * ou jokers multiples *[..] if (joker[0] == '*') { // comparer joker+reste (*toto/..) int jmp; // nombre de caractères pour le prochain mot dans joker int cut = 0; // interdire tout caractère superflu char pass[256]; char LEFT = '[', RIGHT = ']'; int unique = 0; switch (joker[1]) { case '[': LEFT = '['; RIGHT = ']'; unique = 0; break; case '(': LEFT = '('; RIGHT = ')'; unique = 1; break; } if ((joker[1] == LEFT) && (joker[2] != LEFT)) { // multijoker (tm) int i; for(i = 0; i < 256; i++) pass[i] = 0; // noms réservés if ((strfield(joker + 2, "file")) || (strfield(joker + 2, "name"))) { for(i = 0; i < 256; i++) pass[i] = 1; pass[(int) '?'] = 0; //pass[(int) ';'] = 0; pass[(int) '/'] = 0; i = 2; { int len = (int) strlen(joker); while((joker[i] != RIGHT) && (joker[i]) && (i < len)) i++; } } else if (strfield(joker + 2, "path")) { for(i = 0; i < 256; i++) pass[i] = 1; pass[(int) '?'] = 0; //pass[(int) ';'] = 0; i = 2; { int len = (int) strlen(joker); while((joker[i] != RIGHT) && (joker[i]) && (i < len)) i++; } } else if (strfield(joker + 2, "param")) { if (chaine[0] == '?') { // il y a un paramètre juste là for(i = 0; i < 256; i++) pass[i] = 1; } // sinon synonyme de 'rien' i = 2; { int len = (int) strlen(joker); while((joker[i] != RIGHT) && (joker[i]) && (i < len)) i++; } } else { // décode les directives comme *[A-Z,âêîôû,0-9] i = 2; if (joker[i] == RIGHT) { // *[] signifie "plus rien après" cut = 1; // caractère supplémentaire interdit } else { int len = (int) strlen(joker); while((joker[i] != RIGHT) && (joker[i]) && (i < len)) { if ((joker[i] == '<') || (joker[i] == '>')) { // *[<10] int lsize = 0; int lverdict; i++; if (sscanf(joker + i, "%d", &lsize) == 1) { if (size) { if (*size >= 0) { if (size_flag) *size_flag = 1; /* a joué */ if (joker[i - 1] == '<') lverdict = (*size < lsize); else lverdict = (*size > lsize); if (!lverdict) { return NULL; // ne correspond pas } else { *size = lsize; return chaine; // ok } } else return NULL; // ne correspond pas } else return NULL; // ne correspond pas (test impossible) // jump while(isdigit((unsigned char) joker[i])) i++; } } else if (joker[i + 1] == '-') { // 2 car, ex: *[A-Z] if ((int) (unsigned char) joker[i + 2] > (int) (unsigned char) joker[i]) { int j; for(j = (int) (unsigned char) joker[i]; j <= (int) (unsigned char) joker[i + 2]; j++) pass[j] = 1; } // else err=1; i += 3; } else { // 1 car, ex: *[ ] if (joker[i + 2] == '\\' && joker[i + 3] != 0) { // escaped char, such as *[\[] or *[\]] i++; } pass[(int) (unsigned char) joker[i]] = 1; i++; } if ((joker[i] == ',') || (joker[i] == ';')) i++; } } } // à sauter dans joker jmp = i; if (joker[i]) jmp++; // } else { // tout autoriser // int i; for(i = 0; i < 256; i++) pass[i] = 1; // tout autoriser jmp = 1; ////if (joker[2]==LEFT) jmp=3; // permet de recher *<crochet ouvrant> } { int i, max; const char *adr; // la chaine doit se terminer exactement if (cut) { if (strnotempty(chaine)) return NULL; // perdu else return chaine; // ok } // comparaison en boucle, c'est ca qui consomme huhu.. // le tableau pass[256] indique les caractères ASCII autorisés // tester sans le joker (pas ()+ mais ()*) if (!unique) { if ((adr = strjoker(chaine, joker + jmp, size, size_flag))) { return adr; } } // tester i = 0; if (!unique) max = (int) strlen(chaine); else /* *(a) only match a (not aaaaa) */ max = 1; while(i < (int) max) { if (pass[(int) (unsigned char) chaine[i]]) { // caractère autorisé if ((adr = strjoker(chaine + i + 1, joker + jmp, size, size_flag))) { return adr; } i++; } else i = max + 2; // sortir } // tester chaîne vide if (i != max + 2) // avant c'est ok if ((adr = strjoker(chaine + max, joker + jmp, size, size_flag))) return adr; return NULL; // perdu } } else { // comparer mot+reste (toto*..) if (strnotempty(chaine)) { int jmp = 0, ok = 1; // comparer début de joker et début de chaine while((joker[jmp] != '*') && (joker[jmp]) && (ok)) { // CI : remplacer streql par une comparaison != if (!streql(chaine[jmp], joker[jmp])) { ok = 0; // quitter } jmp++; } // comparaison ok? if (ok) { // continuer la comparaison. if (strjoker(chaine + jmp, joker + jmp, size, size_flag)) return chaine; // retourner 1e lettre } } // strlen(a) return NULL; } // * ou mot return NULL; }
// forme à partir d'un lien et du contexte (origin_fil et origin_adr d'où il est tiré) adr et fil // [adr et fil sont des buffers de 1ko] // 0 : ok // -1 : erreur // -2 : protocole non supporté (ftp) int ident_url_relatif(const char *lien,const char* origin_adr,const char* origin_fil,char* adr,char* fil) { int ok=0; int scheme=0; adr[0]='\0'; fil[0]='\0'; //effacer buffers // lien non vide! if (strnotempty(lien)==0) return -1; // erreur! // Scheme? { const char* a=lien; while (isalpha((unsigned char)*a)) a++; if (*a == ':') scheme=1; } // filtrer les parazites (mailto & cie) // scheme+authority (//) if ( (strfield(lien,"http://")) // scheme+// || (strfield(lien,"file://")) // scheme+// || (strncmp(lien,"//",2)==0) // // sans scheme (-> default) ) { if (ident_url_absolute(lien,adr,fil)==-1) { ok=-1; // erreur URL } } else if (strfield(lien,"ftp://")) { // Note: ftp:foobar.gif is not valid if (ftp_available()) { // ftp supporté if (ident_url_absolute(lien,adr,fil)==-1) { ok=-1; // erreur URL } } else { ok=-2; // non supporté } #if HTS_USEMMS } else if (strfield(lien,"mms://")) { if (ident_url_absolute(lien,adr,fil)==-1) { ok=-1; // erreur URL } #endif #if HTS_USEOPENSSL } else if (strfield(lien,"https://")) { if (SSL_is_available) { // Note: ftp:foobar.gif is not valid if (ident_url_absolute(lien,adr,fil)==-1) { ok=-1; // erreur URL } } else { ok=-1; } #endif } else if ((scheme) && ( (!strfield(lien,"http:")) && (!strfield(lien,"https:")) && (!strfield(lien,"ftp:")) #if HTS_USEMMS && (!strfield(lien,"mms:")) #endif )) { ok=-1; // unknown scheme } else { // c'est un lien relatif // On forme l'URL complète à partie de l'url actuelle // et du chemin actuel si besoin est. // copier adresse if (((int) strlen(origin_adr)<HTS_URLMAXSIZE) && ((int) strlen(origin_fil)<HTS_URLMAXSIZE) && ((int) strlen(lien)<HTS_URLMAXSIZE)) { /* patch scheme if necessary */ if (strfield(lien,"http:")) { lien+=5; strcpybuff(adr, jump_protocol(origin_adr)); // même adresse ; protocole vide (http) } else if (strfield(lien,"https:")) { lien+=6; strcpybuff(adr, "https://"); // même adresse forcée en https strcatbuff(adr, jump_protocol(origin_adr)); } else if (strfield(lien,"ftp:")) { lien+=4; strcpybuff(adr, "ftp://"); // même adresse forcée en ftp strcatbuff(adr, jump_protocol(origin_adr)); #if HTS_USEMMS } else if (strfield(lien,"mms:")) { lien+=4; strcpybuff(adr, "mms://"); // même adresse forcée en ftp strcatbuff(adr, jump_protocol(origin_adr)); #endif } else { strcpybuff(adr,origin_adr); // même adresse ; et même éventuel protocole } if (*lien!='/') { // sinon c'est un lien absolu if (*lien == '\0') { strcpybuff(fil,origin_fil); } else if (*lien == '?') { // example: a href="?page=2" char* a; strcpybuff(fil,origin_fil); a=strchr(fil,'?'); if (a) *a='\0'; strcatbuff(fil,lien); } else { const char *a=strchr(origin_fil,'?'); if (a == NULL) a=origin_fil+strlen(origin_fil); while((*a!='/') && ( a > origin_fil) ) a--; if (*a=='/') { // ok on a un '/' if ( (((int) (a - origin_fil))+1+strlen(lien)) < HTS_URLMAXSIZE) { // copier chemin strncpy(fil,origin_fil,((int) (a - origin_fil))+1); *(fil + ((int) (a - origin_fil))+1)='\0'; // copier chemin relatif if (((int) strlen(fil)+(int) strlen(lien)) < HTS_URLMAXSIZE) { strcatbuff(fil,lien + ((*lien=='/')?1:0) ); // simplifier url pour les ../ fil_simplifie(fil); } else ok=-1; // erreur } else { // erreur ok=-1; // erreur URL } } else { // erreur ok=-1; // erreur URL } } } else { // chemin absolu // copier chemin directement strcatbuff(fil,lien); fil_simplifie(fil); } // *lien!='/' } else ok=-1; } // test news: etc. // case insensitive pour adresse { char *a=jump_identification(adr); while(*a) { if ((*a>='A') && (*a<='Z')) *a+='a'-'A'; a++; } } return ok; }
static int __cdecl htsshow_loop(t_hts_callbackarg *carg, httrackp *opt, lien_back* back, int back_max, int back_index, int lien_n, int lien_tot, int stat_time, hts_stat_struct* stats) { // appel� � chaque boucle de HTTrack static TStamp prev_mytime = 0; /* ok */ static t_InpInfo SInfo; /* ok */ // TStamp mytime; long int rate = 0; char st[256]; // int stat_written = -1; int stat_updated = -1; int stat_errors = -1; int stat_warnings = -1; int stat_infos = -1; int nbk = -1; LLint nb = -1; int stat_nsocket = -1; LLint stat_bytes = -1; LLint stat_bytes_recv = -1; int irate = -1; if (stats) { stat_written = stats->stat_files; stat_updated = stats->stat_updated_files; stat_errors = stats->stat_errors; stat_warnings = stats->stat_warnings; stat_infos = stats->stat_infos; nbk = stats->nbk; stat_nsocket = stats->stat_nsocket; irate = (int) stats->rate; nb = stats->nb; stat_bytes = stats->nb; stat_bytes_recv = stats->HTS_TOTAL_RECV; } if (!use_show) return 1; mytime = mtime_local(); if ((stat_time > 0) && (stat_bytes_recv > 0)) rate = (int) (stat_bytes_recv / stat_time); else rate = 0; // pas d'infos /* Infos */ if (stat_bytes >= 0) SInfo.stat_bytes = stat_bytes; // bytes if (stat_time >= 0) SInfo.stat_time = stat_time; // time if (lien_tot >= 0) SInfo.lien_tot = lien_tot; // nb liens if (lien_n >= 0) SInfo.lien_n = lien_n; // scanned SInfo.stat_nsocket = stat_nsocket; // socks if (rate > 0) SInfo.rate = rate; // rate if (irate >= 0) SInfo.irate = irate; // irate if (SInfo.irate < 0) SInfo.irate = SInfo.rate; if (nbk >= 0) SInfo.stat_back = nbk; if (stat_written >= 0) SInfo.stat_written = stat_written; if (stat_updated >= 0) SInfo.stat_updated = stat_updated; if (stat_errors >= 0) SInfo.stat_errors = stat_errors; if (stat_warnings >= 0) SInfo.stat_warnings = stat_warnings; if (stat_infos >= 0) SInfo.stat_infos = stat_infos; if (((mytime - prev_mytime) > 100) || ((mytime - prev_mytime) < 0)) { strc_int2bytes2 strc, strc2, strc3; prev_mytime = mytime; st[0] = '\0'; qsec2str(st, stat_time); vt_home(); printf(VT_GOTOXY("1","1") VT_CLREOL STYLE_STATTEXT "Bytes saved:" STYLE_STATVALUES " \t%s" "\t" VT_CLREOL VT_GOTOXY("40","1") STYLE_STATTEXT "Links scanned:" STYLE_STATVALUES " \t%d/%d (+%d)" VT_CLREOL"\n"VT_CLREOL VT_GOTOXY("1","2") STYLE_STATTEXT "Time:" " \t" STYLE_STATVALUES "%s" "\t" VT_CLREOL VT_GOTOXY("40","2") STYLE_STATTEXT "Files written:" " \t" STYLE_STATVALUES "%d" VT_CLREOL"\n"VT_CLREOL VT_GOTOXY("1","3") STYLE_STATTEXT "Transfer rate:" " \t" STYLE_STATVALUES "%s (%s)" "\t" VT_CLREOL VT_GOTOXY("40","3") STYLE_STATTEXT "Files updated:" " \t" STYLE_STATVALUES "%d" VT_CLREOL"\n"VT_CLREOL VT_GOTOXY("1","4") STYLE_STATTEXT "Active connections:" " \t" STYLE_STATVALUES "%d" "\t" VT_CLREOL VT_GOTOXY("40","4") STYLE_STATTEXT "Errors:" STYLE_STATVALUES " \t" STYLE_STATVALUES "%d" VT_CLREOL"\n" STYLE_STATRESET, /* */ (char*) int2bytes(&strc, SInfo.stat_bytes), (int) lien_n, (int) SInfo.lien_tot, (int) nbk, (char*) st, (int) SInfo.stat_written, (char*) int2bytessec(&strc2, SInfo.irate), (char*) int2bytessec(&strc3, SInfo.rate), (int) SInfo.stat_updated, (int) SInfo.stat_nsocket, (int) SInfo.stat_errors /* */ ); // parcourir registre des liens if (back_index >= 0) { // seulement si index pass� int j, k; int index = 0; int ok = 0; // idem int l; // idem // t_StatsBuffer StatsBuffer[NStatsBuffer]; { int i; for (i = 0; i < NStatsBuffer; i++) { strcpybuff(StatsBuffer[i].state,""); strcpybuff(StatsBuffer[i].name,""); strcpybuff(StatsBuffer[i].file,""); strcpybuff(StatsBuffer[i].url_sav,""); StatsBuffer[i].back = 0; StatsBuffer[i].size = 0; StatsBuffer[i].sizetot = 0; } } for (k = 0; k < 2; k++) { // 0: lien en cours 1: autres liens for (j = 0; (j < 3) && (index < NStatsBuffer); j++) { // passe de priorit� int _i; for (_i = 0 + k; (_i < max(back_max*k,1)) && (index < NStatsBuffer); _i++) { // no lien int i = (back_index + _i) % back_max; // commencer par le "premier" (l'actuel) if (back[i].status >= 0) { // signifie "lien actif" // int ok=0; // OPTI ok = 0; switch (j) { case 0: // prioritaire if ((back[i].status > 0) && (back[i].status < 99)) { strcpybuff(StatsBuffer[index].state,"receive"); ok = 1; } break; case 1: if (back[i].status == STATUS_WAIT_HEADERS) { strcpybuff(StatsBuffer[index].state,"request"); ok = 1; } else if (back[i].status == STATUS_CONNECTING) { strcpybuff(StatsBuffer[index].state,"connect"); ok = 1; } else if (back[i].status == STATUS_WAIT_DNS) { strcpybuff(StatsBuffer[index].state,"search"); ok = 1; } else if (back[i].status == STATUS_FTP_TRANSFER) { // ohh le beau ftp sprintf(StatsBuffer[index].state, "ftp: %s", back[i].info); ok = 1; } break; default: if (back[i].status == STATUS_READY) { // pr�t if ((back[i].r.statuscode == 200)) { strcpybuff(StatsBuffer[index].state,"ready"); ok = 1; } else if ((back[i].r.statuscode >= 100) && (back[i].r.statuscode <= 599)) { char tempo[256]; tempo[0] = '\0'; infostatuscode(tempo, back[i].r.statuscode); strcpybuff(StatsBuffer[index].state,tempo); ok = 1; } else { strcpybuff(StatsBuffer[index].state,"error"); ok = 1; } } break; } if (ok) { char BIGSTK s[HTS_URLMAXSIZE * 2]; // StatsBuffer[index].back = i; // index pour + d'infos // s[0] = '\0'; strcpybuff(StatsBuffer[index].url_sav,back[i].url_sav); // pour cancel if (strcmp(back[i].url_adr, "file://")) strcatbuff(s,back[i].url_adr); else strcatbuff(s,"localhost"); if (back[i].url_fil[0] != '/') strcatbuff(s,"/"); strcatbuff(s,back[i].url_fil); StatsBuffer[index].file[0] = '\0'; { char* a = strrchr(s, '/'); if (a) { strncatbuff(StatsBuffer[index].file,a,200); *a = '\0'; } } if ((l = (int) strlen(s)) < MAX_LEN_INPROGRESS) strcpybuff(StatsBuffer[index].name,s); else { // couper StatsBuffer[index].name[0] = '\0'; strncatbuff(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2); strcatbuff(StatsBuffer[index].name,"..."); strcatbuff(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2); } if (back[i].r.totalsize > 0) { // taille pr�d�finie StatsBuffer[index].sizetot = back[i].r.totalsize; StatsBuffer[index].size = back[i].r.size; } else { // pas de taille pr�d�finie if (back[i].status == STATUS_READY) { // pr�t StatsBuffer[index].sizetot = back[i].r.size; StatsBuffer[index].size = back[i].r.size; } else { StatsBuffer[index].sizetot = 8192; StatsBuffer[index].size = (back[i].r.size % 8192); } } index++; } } } } } /* LF */ printf("%s\n", VT_CLREOL); /* Display current job */ { int parsing = 0; printf("Current job: "); if (!(parsing = hts_is_parsing(opt, -1))) printf("receiving files"); else { switch (hts_is_testing(opt)) { case 0: printf("parsing HTML file (%d%%)", parsing); break; case 1: printf("parsing HTML file: testing links (%d%%)", parsing); break; case 2: printf("purging files"); break; case 3: printf("loading cache"); break; case 4: printf("waiting (scheduler)"); break; case 5: printf("waiting (throttle)"); break; } } printf("%s\n", VT_CLREOL); } /* Display background jobs */ { int i; for (i = 0; i < NStatsBuffer; i++) { if (strnotempty(StatsBuffer[i].state)) { printf(VT_CLREOL" %s - \t%s%s \t%s / \t%s", StatsBuffer[i].state, StatsBuffer[i].name, StatsBuffer[i].file, int2bytes(&strc, StatsBuffer[i].size), int2bytes(&strc2, StatsBuffer[i].sizetot)); } printf("%s\n", VT_CLREOL); } } } } return 1; }
static void arg_init(struct hts_proj *proj) { char * a; char cmd[4096] = { 0x00 }; char str[256] = {0x00}; proj->argc = 1; if (proj->cache == -1) proj->cache = 2; //cache default is best chose if (proj->conns == -1) proj->conns = 10; //10 limits the number of connections per second if (proj->timeout == -1) proj->timeout = 10; if (strnotempty(proj->name) == 0) proj->errorno = 1; if (strnotempty(proj->urls) == 0) proj->errorno = 2; while ((a = strchr(proj->urls, ','))) *a = ' '; while ((a = strchr(proj->urls, '\t'))) *a = ' '; strcat(cmd, proj->urls); strcat(cmd, " "); //connections limit sprintf(str, "-%%c%d ", proj->conns); strcat(cmd, str); //timeout sprintf(str, "-T%d ", proj->timeout); strcat(cmd, str); if (strnotempty(proj->savepath) == 0) { strcat(proj->savepath, getenv("HOME")); strcat(proj->savepath, "/websites/"); } if (strnotempty(proj->savepath)){ if ((proj->savepath[strlen(proj->savepath)-1]!='/') && (proj->savepath[strlen(proj->savepath)-1]!='\\')) { strcat(proj->savepath, "/"); } } strcat(cmd, "-q "); if (strnotempty(proj->linklist)) { strcat(cmd, "-%L"); strcat(cmd, proj->linklist); strcat(cmd, " "); } sprintf(str, "-C%d ", proj->cache); strcat(cmd, str); switch (proj->action) { case ACTION_ALL_SITES: strcat(cmd, "-w "); break; case ACTION_ONLY_FILES: strcat(cmd, "-g "); break; case ACTION_WITH_CACHE: strcat(cmd, "-i "); break; case ACTION_FIST_LEVEL: strcat(cmd, "-Y "); break; } strcat(cmd," --path \""); strcat(cmd, proj->savepath); strcat(cmd, proj->name); strcat(cmd, "\" "); sprintf(str, "-p%d ", proj->priority); strcat(cmd, str); if (strnotempty(proj->proxy_port) == 0) { sprintf(proj->proxy_port,"%s", "8080"); } if (proj->retries) { sprintf(str, "-R%d ", proj->retries); strcat(cmd, str); } if (strnotempty(proj->proxy)) { strcat(cmd, "--proxy "); strcat(cmd, proj->proxy); strcat(cmd, ":"); strcat(cmd, proj->proxy_port); } if (proj->depth > 0) { sprintf(str,"-r%d ", proj->depth); strcat(cmd, str); } proj->argv[0] = "httrack"; proj->argc++; proj->argv[1] = cmd; fprintf(stderr, "%s\n", cmd); int i = 0; int g = 0; while (cmd[i]) { if (cmd[i] == '\"') g = !g; if (cmd[i] == ' ') { if (!g) { cmd[i] = '\0'; proj->argv[proj->argc++] = cmd + i + 1; } } i++; } }
// lire cookies.txt // lire également (Windows seulement) les *@*.txt (cookies IE copiés) // !=0 : erreur int cookie_load(t_cookie * cookie, const char *fpath, const char *name) { char catbuff[CATBUFF_SIZE]; char buffer[8192]; // cookie->data[0]='\0'; // Fusionner d'abord les éventuels cookies IE #ifdef _WIN32 { WIN32_FIND_DATAA find; HANDLE h; char pth[MAX_PATH + 32]; strcpybuff(pth, fpath); strcatbuff(pth, "*@*.txt"); h = FindFirstFileA((char *) pth, &find); if (h != INVALID_HANDLE_VALUE) { do { if (!(find.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) if (!(find.dwFileAttributes & FILE_ATTRIBUTE_SYSTEM)) { FILE *fp = fopen(fconcat(catbuff, fpath, find.cFileName), "rb"); if (fp) { char cook_name[256]; char cook_value[1000]; char domainpathpath[512]; char dummy[512]; // char domain[256]; // domaine cookie (.netscape.com) char path[256]; // chemin (/) int cookie_merged = 0; // // Read all cookies while(!feof(fp)) { cook_name[0] = cook_value[0] = domainpathpath[0] = dummy[0] = domain[0] = path[0] = '\0'; linput(fp, cook_name, 250); if (!feof(fp)) { linput(fp, cook_value, 250); if (!feof(fp)) { int i; linput(fp, domainpathpath, 500); /* Read 6 other useless values */ for(i = 0; !feof(fp) && i < 6; i++) { linput(fp, dummy, 500); } if (strnotempty(cook_name) && strnotempty(cook_value) && strnotempty(domainpathpath)) { if (ident_url_absolute(domainpathpath, domain, path) >= 0) { cookie_add(cookie, cook_name, cook_value, domain, path); cookie_merged = 1; } } } } } fclose(fp); if (cookie_merged) remove(fconcat(catbuff, fpath, find.cFileName)); } // if fp } } while(FindNextFileA(h, &find)); FindClose(h); } } #endif // Ensuite, cookies.txt { FILE *fp = fopen(fconcat(catbuff, fpath, name), "rb"); if (fp) { char BIGSTK line[8192]; while((!feof(fp)) && (((int) strlen(cookie->data)) < cookie->max_len)) { rawlinput(fp, line, 8100); if (strnotempty(line)) { if (strlen(line) < 8000) { if (line[0] != '#') { char domain[256]; // domaine cookie (.netscape.com) char path[256]; // chemin (/) char cook_name[1024]; // nom cookie (MYCOOK) char BIGSTK cook_value[8192]; // valeur (ID=toto,S=1234) strcpybuff(domain, cookie_get(buffer, line, 0)); // host strcpybuff(path, cookie_get(buffer, line, 2)); // path strcpybuff(cook_name, cookie_get(buffer, line, 5)); // name strcpybuff(cook_value, cookie_get(buffer, line, 6)); // value #if DEBUG_COOK printf("%s\n", line); #endif cookie_add(cookie, cook_name, cook_value, domain, path); } } } } fclose(fp); return 0; } } return -1; }
// catch_url // returns 0 if error // url: buffer where URL must be stored - or ip:port in case of failure // data: 32Kb HTSEXT_API int catch_url(T_SOC soc, char *url, char *method, char *data) { int retour = 0; // connexion (accept) if (soc != INVALID_SOCKET) { T_SOC soc2; while((soc2 = (T_SOC) accept(soc, NULL, NULL)) == INVALID_SOCKET) ; /* #ifdef _WIN32 closesocket(soc); #else close(soc); #endif */ soc = soc2; /* INFOS */ { SOCaddr server2; SOClen len = SOCaddr_capacity(server2); if (getpeername(soc, &SOCaddr_sockaddr(server2), &len) == 0) { char dot[256 + 2]; SOCaddr_inetntoa(dot, sizeof(dot), server2); sprintf(url, "%s:%d", dot, ntohs(SOCaddr_sinport(server2))); } } /* INFOS */ // réception if (soc != INVALID_SOCKET) { char line[1000]; char protocol[256]; line[0] = protocol[0] = '\0'; // socinput(soc, line, 1000); if (strnotempty(line)) { if (sscanf(line, "%s %s %s", method, url, protocol) == 3) { lien_adrfil af; // méthode en majuscule size_t i; int r = 0; af.adr[0] = af.fil[0] = '\0'; // for(i = 0; method[i] != '\0'; i++) { if ((method[i] >= 'a') && (method[i] <= 'z')) method[i] -= ('a' - 'A'); } // adresse du lien if (ident_url_absolute(url, &af) >= 0) { // Traitement des en-têtes char BIGSTK loc[HTS_URLMAXSIZE * 2]; htsblk blkretour; hts_init_htsblk(&blkretour); //memset(&blkretour, 0, sizeof(htsblk)); // effacer blkretour.location = loc; // si non nul, contiendra l'adresse véritable en cas de moved xx // Lire en têtes restants sprintf(data, "%s %s %s\r\n", method, af.fil, protocol); while(strnotempty(line)) { socinput(soc, line, 1000); treathead(NULL, NULL, NULL, &blkretour, line); // traiter strcatbuff(data, line); strcatbuff(data, "\r\n"); } // CR/LF final de l'en tête inutile car déja placé via la ligne vide juste au dessus //strcatbuff(data,"\r\n"); if (blkretour.totalsize > 0) { int len = (int) min(blkretour.totalsize, 32000); int pos = (int) strlen(data); // Copier le reste (post éventuel) while((len > 0) && ((r = recv(soc, (char *) data + pos, len, 0)) > 0)) { pos += r; len -= r; data[pos] = '\0'; // terminer par NULL } } // Envoyer page sprintf(line, CATCH_RESPONSE); send(soc, line, (int) strlen(line), 0); // OK! retour = 1; } } } // sinon erreur } } if (soc != INVALID_SOCKET) { #ifdef _WIN32 closesocket(soc); /* WSACleanup(); */ #else close(soc); #endif } return retour; }
static int hts_parse_java(t_hts_callbackarg * carg, httrackp * opt, htsmoduleStruct * str) { /* The wrapper_name memebr has changed: not for us anymore */ if (str->wrapper_name == NULL || strcmp(str->wrapper_name, libName) != 0) { /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, parse) != NULL) { return CALLBACKARG_PREV_FUN(carg, parse) (CALLBACKARG_PREV_CARG(carg), opt, str); } strcpy(str->err_msg, "unexpected error: bad wrapper_name and no previous wrapper"); return 0; /* Unexpected error */ } else { if (detect_mime(str)) { /* (Legacy code) */ char catbuff[CATBUFF_SIZE]; FILE *fpout; JAVA_HEADER header; RESP_STRUCT *tab; const char *file = str->filename; str->relativeToHtmlLink = 1; #if JAVADEBUG printf("fopen\n"); #endif if ((fpout = FOPEN(fconv(catbuff, sizeof(catbuff), file), "r+b")) == NULL) { //fprintf(stderr, "Cannot open input file.\n"); sprintf(str->err_msg, "Unable to open file %s", file); return 0; // une erreur.. } #if JAVADEBUG printf("fread\n"); #endif //if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) { // pas complet.. if (fread(&header, 1, 10, fpout) != 10) { // pas complet.. fclose(fpout); sprintf(str->err_msg, "File header too small (file len = " LLintP ")", (LLint) fsize(file)); return 0; } #if JAVADEBUG printf("header\n"); #endif // tester en tête if (reverse_endian()) { header.magic = hts_swap32(header.magic); header.count = hts_swap16(header.count); } if (header.magic != 0xCAFEBABE) { sprintf(str->err_msg, "non java file"); if (fpout) { fclose(fpout); fpout = NULL; } return 0; } tab = (RESP_STRUCT *) calloc(header.count, sizeof(RESP_STRUCT)); if (!tab) { sprintf(str->err_msg, "Unable to alloc %d bytes", (int) sizeof(RESP_STRUCT)); if (fpout) { fclose(fpout); fpout = NULL; } return 0; // erreur.. } #if JAVADEBUG printf("calchead\n"); #endif { int i; for(i = 1; i < header.count; i++) { int err = 0; // ++ tab[i] = readtable(str, fpout, tab[i], &err); if (!err) { if ((tab[i].type == HTS_LONG) || (tab[i].type == HTS_DOUBLE)) i++; //2 element si double ou float } else { // ++ une erreur est survenue! if (strnotempty(str->err_msg) == 0) strcpy(str->err_msg, "Internal readtable error"); free(tab); if (fpout) { fclose(fpout); fpout = NULL; } return 0; } } } #if JAVADEBUG printf("addfiles\n"); #endif { //unsigned int acess; unsigned int Class; unsigned int SClass; int i; //acess = readshort(fpout); Class = readshort(fpout); SClass = readshort(fpout); for(i = 1; i < header.count; i++) { if (tab[i].type == HTS_CLASS) { if ((tab[i].index1 < header.count) && (tab[i].index1 >= 0)) { if ((tab[i].index1 != SClass) && (tab[i].index1 != Class) && (tab[tab[i].index1].name[0] != '[')) { if (!strstr(tab[tab[i].index1].name, "java/")) { char BIGSTK tempo[1024]; tempo[0] = '\0'; sprintf(tempo, "%s.class", tab[tab[i].index1].name); #if JAVADEBUG printf("add %s\n", tempo); #endif if (tab[tab[i].index1].file_position >= 0) str->addLink(str, tempo); /* tab[tab[i].index1].file_position */ } } } else { i = header.count; // exit } } } } #if JAVADEBUG printf("end\n"); #endif free(tab); if (fpout) { fclose(fpout); fpout = NULL; } return 1; } else { strcpy(str->err_msg, "bad MIME type"); } } return 0; /* Error */ }
void help_wizard(httrackp* opt) { char* urls = (char*) malloct(HTS_URLMAXSIZE*2); char* mainpath = (char*) malloct(256); char* projname = (char*) malloct(256); char* stropt = (char*) malloct(2048); // options char* stropt2 = (char*) malloct(2048); // options longues char* strwild = (char*) malloct(2048); // wildcards char* cmd = (char*) malloct(4096); char* str = (char*) malloct(256); char** argv = (char**) malloct(256 * sizeof(char*)); // char* a; // if (urls == NULL || mainpath == NULL || projname == NULL || stropt == NULL || stropt2 == NULL || strwild == NULL || cmd == NULL || str == NULL || argv == NULL) { fprintf(stderr, "* memory exhausted in %s, line %d\n", __FILE__, __LINE__); return; } urls[0] = mainpath[0] = projname[0] = stropt[0] = stropt2[0] = strwild[0] = cmd[0] = str[0] = '\0'; // strcpybuff(stropt,"-"); mainpath[0]=projname[0]=stropt2[0]=strwild[0]='\0'; // printf("\n"); printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"%s\n", hts_get_version_info(opt)); printf("Copyright (C) Xavier Roche and other contributors\n"); #ifdef _WIN32 printf("Note: You are running the commandline version,\n"); printf("run 'WinHTTrack.exe' to get the GUI version.\n"); #endif #ifdef HTTRACK_AFF_WARNING printf("NOTE: "HTTRACK_AFF_WARNING"\n"); #endif #ifdef HTS_PLATFORM_NAME #if USE_BEGINTHREAD printf("[compiled: "HTS_PLATFORM_NAME" - MT]\n"); #else printf("[compiled: "HTS_PLATFORM_NAME"]\n"); #endif #endif printf("To see the option list, enter a blank line or try httrack --help\n"); // // Project name while(strnotempty(projname)==0) { printf("\n"); printf("Enter project name :"); fflush(stdout); linput(stdin,projname,250); if (strnotempty(projname)==0) help("httrack",1); } // // Path if (strnotempty(hts_gethome())) printf("\nBase path (return=%s/websites/) :",hts_gethome()); else printf("\nBase path (return=current directory) :"); linput(stdin,str,250); if (!strnotempty(str)) { strcatbuff(str,hts_gethome()); strcatbuff(str,"/websites/"); } if (strnotempty(str)) if ((str[strlen(str)-1]!='/') && (str[strlen(str)-1]!='\\')) strcatbuff(str,"/"); strcatbuff(stropt2,"-O \""); strcatbuff(stropt2,str); strcatbuff(stropt2,projname); strcatbuff(stropt2,"\" "); // Créer si ce n'est fait un index.html 1er niveau make_empty_index(str); // printf("\n"); printf("Enter URLs (separated by commas or blank spaces) :"); fflush(stdout); linput(stdin,urls,250); if (strnotempty(urls)) { while( (a=strchr(urls,',')) ) *a=' '; while( (a=strchr(urls,'\t')) ) *a=' '; // Action printf("\nAction:\n"); switch(help_query("Mirror Web Site(s)|Mirror Web Site(s) with Wizard|Just Get Files Indicated|Mirror ALL links in URLs (Multiple Mirror)|Test Links In URLs (Bookmark Test)|Update/Continue a Mirror",1)) { case 1: break; case 2: strcatbuff(stropt,"W"); break; case 3: strcatbuff(stropt2,"--get "); break; case 4: strcatbuff(stropt2,"--mirrorlinks "); break; case 5: strcatbuff(stropt2,"--testlinks "); break; case 6: strcatbuff(stropt2,"--update "); break; case 0: return; break; } // Proxy printf("\nProxy (return=none) :"); linput(stdin,str,250); if (strnotempty(str)) { while( (a=strchr(str,' ')) ) *a=':'; // port if (!strchr(jump_identification(str),':')) { char str2[256]; printf("\nProxy port (return=8080) :"); linput(stdin,str2,250); strcatbuff(str,":"); if (strnotempty(str2)==0) strcatbuff(str,"8080"); else strcatbuff(str,str2); } strcatbuff(stropt2,"-P "); strcatbuff(stropt2,str); strcatbuff(stropt2," "); } // Display strcatbuff(stropt2," -%v "); // Wildcards printf("\nYou can define wildcards, like: -*.gif +www.*.com/*.zip -*img_*.zip\n"); printf("Wildcards (return=none) :"); linput(stdin,strwild,250); // Options do { printf("\nYou can define additional options, such as recurse level (-r<number>), separed by blank spaces\n"); printf("To see the option list, type help\n"); printf("Additional options (return=none) :"); linput(stdin,str,250); if (strfield2(str,"help")) { help("httrack",2); } else if (strnotempty(str)) { strcatbuff(stropt2,str); strcatbuff(stropt2," "); } } while(strfield2(str,"help")); { int argc=1; int g=0; int i=0; // printf("\n"); if (strlen(stropt)==1) stropt[0]='\0'; // aucune sprintf(cmd,"%s %s %s %s",urls,stropt,stropt2,strwild); printf("---> Wizard command line: httrack %s\n\n",cmd); printf("Ready to launch the mirror? (Y/n) :"); fflush(stdout); linput(stdin,str,250); if (strnotempty(str)) { if (!((str[0]=='y') || (str[0]=='Y'))) return ; } printf("\n"); // couper en morceaux argv[0]="winhttrack"; argv[1]=cmd; argc++; while(cmd[i]) { if(cmd[i]=='\"') g=!g; if(cmd[i]==' '){ if(!g){ cmd[i]='\0'; argv[argc++]=cmd+i+1; } } i++; } hts_main(argc,argv); } //} else { // help("httrack",1); } /* Free buffers */ freet(urls); freet(mainpath); freet(projname); freet(stropt); freet(stropt2); freet(strwild); freet(cmd); freet(str); }
// la véritable fonction une fois lancées les routines thread/fork int run_launch_ftp(FTPDownloadStruct * pStruct) { lien_back *back = pStruct->pBack; httrackp *opt = pStruct->pOpt; char user[256] = "anonymous"; char pass[256] = "user@"; char line_retr[2048]; int port = 21; #if FTP_PASV int port_pasv = 0; #endif char BIGSTK adr_ip[1024]; char *adr, *real_adr; char *ftp_filename = ""; int timeout = 300; // timeout int timeout_onfly = 8; // attente réponse supplémentaire int transfer_list = 0; // directory int rest_understood = 0; // rest command understood t_fullhostent fullhostent_buffer; // buffer pour resolver // T_SOC soc_ctl = INVALID_SOCKET; T_SOC soc_servdat = INVALID_SOCKET; T_SOC soc_dat = INVALID_SOCKET; // SOCaddr server_data; int server_data_size = sizeof(server_data); // line_retr[0] = adr_ip[0] = '\0'; timeout = 300; // effacer strcpybuff(back->r.msg, ""); back->r.statuscode = 0; back->r.size = 0; // récupérer user et pass si présents, et sauter user:id@ dans adr real_adr = strchr(back->url_adr, ':'); if (real_adr) real_adr++; else real_adr = back->url_adr; while(*real_adr == '/') real_adr++; // sauter / if ((adr = jump_identification(real_adr)) != real_adr) { // user int i = -1; pass[0] = '\0'; do { i++; user[i] = real_adr[i]; } while((real_adr[i] != ':') && (real_adr[i])); user[i] = '\0'; if (real_adr[i] == ':') { // pass int j = -1; i++; // oui on saute aussi le : do { j++; pass[j] = real_adr[i + j]; } while(((&real_adr[i + j + 1]) < adr) && (real_adr[i + j])); pass[j] = '\0'; } } // Calculer RETR <nom> { char *a; #if 0 a = back->url_fil + strlen(back->url_fil) - 1; while((a > back->url_fil) && (*a != '/')) a--; if (*a != '/') { a = NULL; } #else a = back->url_fil; #endif if (a != NULL && *a != '\0') { #if 0 a++; // sauter / #endif ftp_filename = a; if (strnotempty(a)) { char catbuff[CATBUFF_SIZE]; char *ua = unescape_http(catbuff, a); int len_a = (int) strlen(ua); if (len_a > 0 && ua[len_a - 1] == '/') { /* obviously a directory listing */ transfer_list = 1; snprintf(line_retr, sizeof(line_retr), "LIST -A %s", ua); } else if ((strchr(ua, ' ')) || (strchr(ua, '\"')) || (strchr(ua, '\'')) ) { snprintf(line_retr, sizeof(line_retr), "RETR \"%s\"", ua); } else { /* Regular one */ snprintf(line_retr, sizeof(line_retr), "RETR %s", ua); } } else { transfer_list = 1; snprintf(line_retr, sizeof(line_retr), "LIST -A"); } } else { strcpybuff(back->r.msg, "Unexpected PORT error"); // back->status=STATUS_FTP_READY; // fini back->r.statuscode = STATUSCODE_INVALID; } } #if FTP_DEBUG printf("Connecting to %s...\n", adr); #endif // connexion { SOCaddr server; int server_size = sizeof(server); t_hostent *hp; char *a; char _adr[256]; const char *error = "unknown error"; _adr[0] = '\0'; //T_SOC soc_ctl; // effacer structure memset(&server, 0, sizeof(server)); // port a = strchr(adr, ':'); // port if (a) { sscanf(a + 1, "%d", &port); strncatbuff(_adr, adr, (int) (a - adr)); } else strcpybuff(_adr, adr); // récupérer adresse résolue strcpybuff(back->info, "host name"); hp = hts_gethostbyname2(opt, _adr, &fullhostent_buffer, &error); if (hp == NULL) { snprintf(back->r.msg, sizeof(back->r.msg), "Unable to get server's address: %s", error); // back->status=STATUS_FTP_READY; // fini back->r.statuscode = STATUSCODE_NON_FATAL; _HALT_FTP return 0; } _CHECK_HALT_FTP; // copie adresse SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length); // copie adresse pour cnx data SOCaddr_copyaddr(server_data, server_data_size, hp->h_addr_list[0], hp->h_length); // memcpy(&server.sin_addr, hp->h_addr, hp->h_length); // créer ("attachement") une socket (point d'accès) internet,en flot soc_ctl = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); if (soc_ctl == INVALID_SOCKET) { strcpybuff(back->r.msg, "Unable to create a socket"); // back->status=STATUS_FTP_READY; // fini back->r.statuscode = STATUSCODE_INVALID; _HALT_FTP return 0; }
void LANG_LOAD(char* limit_to) { CWaitCursor wait; // extern int NewLangStrSz; extern coucal NewLangStr; extern int NewLangStrKeysSz; extern coucal NewLangStrKeys; // int selected_lang=LANG_T(-1); // if (!limit_to) { LANG_DELETE(); NewLangStr=coucal_new(NewLangStrSz); NewLangStrKeys=coucal_new(NewLangStrKeysSz); if ((NewLangStr==NULL) || (NewLangStrKeys==NULL)) { AfxMessageBox("Error in lang.h: not enough memory"); } else { coucal_value_is_malloc(NewLangStr,1); coucal_value_is_malloc(NewLangStrKeys,1); } } TCHAR ModulePath[MAX_PATH + 1]; ModulePath[0] = '\0'; ::GetModuleFileName(NULL, ModulePath, sizeof(ModulePath)/sizeof(TCHAR) - 1); TCHAR* pos = _tcsrchr(ModulePath, '\\'); if (pos != NULL) { * ( pos + 1) = '\0'; } else { ModulePath[0] = '\0'; } /* Load master file (list of keys and internal keys) */ CString app = ModulePath; if (!limit_to) { CString mname=app+"lang.def"; if (!fexist((char*)LPCTSTR(mname))) mname="lang.def"; FILE* fp=fopen(mname,"rb"); if (fp) { char intkey[8192]; char key[8192]; while(!feof(fp)) { linput_cpp(fp,intkey,8000); linput_cpp(fp,key,8000); if (strnotempty(intkey) && strnotempty(key)) { char* test=LANGINTKEY(key); /* Increment for multiple definitions */ if (strnotempty(test)) { int increment=0; size_t pos = strlen(key); do { increment++; sprintf(key+pos,"%d",increment); test=LANGINTKEY(key); } while (strnotempty(test)); } if (!strnotempty(test)) { // éviter doublons // conv_printf(key,key); size_t len; char* buff; len=strlen(intkey); buff=(char*)malloc(len+2); if (buff) { strcpybuff(buff,intkey); coucal_add(NewLangStrKeys,key,(intptr_t)buff); } } } // if } // while fclose(fp); } else { AfxMessageBox("FATAL ERROR\r\n'lang.def' file NOT FOUND!\r\nEnsure that the installation was complete!"); exit(0); } } /* Language Name? */ char* hashname; { char name[256]; sprintf(name,"LANGUAGE_%d",selected_lang+1); hashname=LANGINTKEY(name); } /* Get only language name */ if (limit_to) { if (hashname) strcpybuff(limit_to,hashname); else strcpybuff(limit_to,"???"); return; } /* Error */ if (!hashname) return; // xxc TEST /* setlocale( LC_ALL, "Japanese"); _setmbcp(932); // shift-jis setlocale( LC_ALL, ".932" ); setlocale( LC_ALL, "[.932]" ); CString st=""; int lid=SetThreadLocale(MAKELCID(MAKELANGID(LANG_JAPANESE,SUBLANG_NEUTRAL),SORT_DEFAULT )); */ /* Load specific language file */ { int loops; CString err_msg=""; // 2nd loop: load undefined strings for(loops=0;loops<2;loops++) { CString lbasename; { char name[256]; sprintf(name,"LANGUAGE_%d",(loops==0)?(selected_lang+1):1); hashname=LANGINTKEY(name); } lbasename.Format("lang/%s.txt",hashname); CString lname=app+lbasename; if (!fexist((char*)LPCTSTR(lname))) lname=lbasename; FILE* fp=fopen(lname,"rb"); if (fp) { char extkey[8192]; TCHAR value[8192]; while(!feof(fp)) { //int ssz; linput_cpp(fp,extkey,8000); linput_cpp(fp,value,8000); /* ssz=linput_cpp(fp,value,8000); CString st=value; AfxMessageBox(st); if (ssz>0) { int tst=0; int test=IsTextUnicode(value,ssz,&tst); unsigned short st2[1024]; int ret=MultiByteToWideChar(CP_UTF8,0,(char*)value,ssz,st2,1024); if (ret>0) { char st3[1024]=""; int ret2=WideCharToMultiByte(CP_THREAD_ACP,0,st2,ret,(char*)st3,1024,NULL,FALSE); if (ret2>0) { AfxMessageBox(st3); } } } */ if (strnotempty(extkey) && strnotempty(value)) { int len; char* buff; char* intkey; intkey=LANGINTKEY(extkey); if (strnotempty(intkey)) { /* Increment for multiple definitions */ { char* test=LANGSEL(intkey); if (strnotempty(test)) { if (loops == 0) { int increment=0; size_t pos=strlen(extkey); do { increment++; sprintf(extkey+pos,"%d",increment); intkey=LANGINTKEY(extkey); if (strnotempty(intkey)) test=LANGSEL(intkey); else test=""; } while (strnotempty(test)); } else intkey=""; } else { if (loops > 0) { err_msg += intkey; err_msg += " "; } } } /* Add key */ if (strnotempty(intkey)) { len = (int) strlen(value); buff = (char*)malloc(len+2); if (buff) { conv_printf(value,buff); coucal_add(NewLangStr,intkey,(intptr_t)buff); } } } } // if } // while fclose(fp); } else { AfxMessageBox("FATAL ERROR\r\n'lang.def' file NOT FOUND!\r\nEnsure that the installation was complete!"); exit(0); } } if (err_msg.GetLength()>0) { // AfxMessageBox("Error: undefined strings follows:\r\n"+err_msg); } } #if 0 app=app+"lang.h"; if (!fexist((char*)LPCTSTR(app))) app="lang.h"; FILE* fp=fopen(app,"rb"); if (fp) { char s[8192]; while(!feof(fp)) { linput_cpp(fp,s,8000); if (!strncmp(s,"#define ",8)) { char* a; char* name=s+8; a=name; while((*a!=' ') && (*a)) a++; if ((*a) && (strlen(name)>0) && (((int) a - (int) name)<64)) { *a++='\0'; if (limit_to) { if (strcmp(name,limit_to)) a=NULL; } if (a) { char* data; data=a; int toggle=0; char* start_str=NULL; int count=0; while(*a) { if (*a=='\"') { toggle++; if ((toggle%2)==1) { if (count==selected_lang) { start_str=a+1; } count++; } else { if (start_str) { char* buff; int len; len=(int) a - (int) start_str; if (len) { buff=(char*)malloc(len+2); if (buff) { int i=0,j=0; buff[0]='\0'; //strncatbuff(buff,start_str,len); while(i<len) { switch(start_str[i]) { case '\\': i++; switch(start_str[i]) { case 'a': buff[j]='\a'; break; case 'b': buff[j]='\b'; break; case 'f': buff[j]='\f'; break; case 'n': buff[j]='\n'; break; case 'r': buff[j]='\r'; break; case 't': buff[j]='\t'; break; case 'v': buff[j]='\v'; break; case '\'': buff[j]='\''; break; case '\"': buff[j]='\"'; break; case '\\': buff[j]='\\'; break; case '?': buff[j]='\?'; break; default: buff[j]=start_str[i]; break; } break; default: buff[j]=start_str[i]; break; } i++; j++; } buff[j++]='\0'; if (!limit_to) coucal_add(NewLangStr,name,(intptr_t)buff); else { strcpybuff(limit_to,buff); free(buff); return; } } } start_str=NULL; } } } a++; } } //NewLangStr.SetAt(sname,st); /* } else { CString info; info.Format("Error in lang.h: %s",name); AfxMessageBox(info); */ } } } fclose(fp); } else { AfxMessageBox("FATAL ERROR\r\n'lang.h' file NOT FOUND!\r\nEnsure that the installation was complete!"); exit(0); } #endif // Control limit_to if (limit_to) limit_to[0]='\0'; // Set locale if (!limit_to) { CString charset = LANGUAGE_CHARSET; charset.TrimLeft(); charset.TrimRight(); charset.MakeLower(); NewLangCP = CP_THREAD_ACP; NewLangFileCP = CP_THREAD_ACP; #if 0 if (charset.GetLength() > 0) { if (charset.Left(9) == "iso-8859-") { int iso = 0; int isoCP[] = {0, /* 0 */ 1252, /* ISO-8859-1 */ 1250, /* ISO-8859-2 */ 0, /* ISO-8859-3 */ 0, /* ISO-8859-4 */ 1251, /* ISO-8859-5 */ 1256, /* ISO-8859-6 */ 1253, /* ISO-8859-7 */ 1255, /* ISO-8859-8 */ 1254, /* ISO-8859-9 */ }; if (sscanf(charset.GetBuffer(0) + 9, "%d", &iso) == 1) { if (iso < sizeof(isoCP)/sizeof(isoCP[0])) { if (isoCP[iso] != 0) { NewLangFileCP = isoCP[iso]; } } } } else if (charset.Left(8) == "windows-") { int windows = 0; if (sscanf(charset.GetBuffer(0) + 8, "%d", &windows) == 1) { NewLangFileCP = windows; } } else if (charset == "shift-jis") { NewLangFileCP = 932; } else if (charset == "big5") { NewLangFileCP = 950; } else if (charset == "gb2312") { NewLangFileCP = 936; } else { NewLangFileCP = CP_THREAD_ACP; } } WORD acp = GetACP(); if (NewLangFileCP != CP_THREAD_ACP && NewLangFileCP != acp) { char* currName = LANGUAGE_WINDOWSID; LCID thl = GetThreadLocale(); WORD sid = SORTIDFROMLCID(thl); WORD lid = 0; WinLangid* lids; if (currName[0]) { for( lids = (WinLangid*)&WINDOWS_LANGID ; lids->name != NULL ; lids++ ) { if (strcmp(currName, lids->name) == 0) { lid = lids->langId; break; } } if (lid != 0) { SetThreadLocale(MAKELCID(lid, sid)); } } } #endif } }
/* Indexing system A little bit dirty, (quick'n dirty, in fact) But should be okay on most cases Tags and javascript handled (ignored) */ int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath) { #if HTS_MAKE_KEYWORD_INDEX char catbuff[CATBUFF_SIZE]; int intag=0,inscript=0,incomment=0; char keyword[KEYW_LEN+32]; int i=0; // int WordIndexSize=1024; inthash WordIndexHash=NULL; FILE *tmpfp=NULL; // // Check parameters if (!html_data) return 0; if (!size) return 0; if (!mime) return 0; if (!filename) return 0; // Init ? if (hts_index_init) { remove(concat(catbuff,indexpath,"index.txt")); remove(concat(catbuff,indexpath,"sindex.html")); hts_index_init=0; } // Check MIME type if (is_html_mime_type(mime)) { inscript=0; } // FIXME - temporary fix for image/svg+xml (svg) // "IN XML" (html like, in fact :) ) else if ( (strfield2(mime,"image/svg+xml")) || (strfield2(mime,"image/svg-xml")) #if HTS_USEMMS || strfield2(mime,"video/x-ms-asf") #endif ) { inscript=0; } else if ( (strfield2(mime,"application/x-javascript")) || (strfield2(mime,"text/css")) ) { inscript=1; //} else if (strfield2(mime, "text/vnd.wap.wml")) { // humm won't work in many cases // inscript=0; } else return 0; // Temporary file tmpfp = tmpfile(); if (!tmpfp) return 0; // Create hash structure // Hash tables rulez da world! WordIndexHash=inthash_new(WordIndexSize); if (!WordIndexHash) return 0; // Start indexing this page keyword[0]='\0'; while(i<size) { if (strfield(html_data + i , "<script")) { inscript=1; } else if (strfield(html_data + i , "<!--")) { incomment=1; } else if (strfield(html_data + i , "</script")) { if (!incomment) inscript=0; } else if (strfield(html_data + i , "-->")) { incomment=0; } else if (html_data[i]=='<') { if (!inscript) intag=1; } else if (html_data[i]=='>') { intag=0; } else { // Okay, parse keywords if ( (!inscript) && (!incomment) && (!intag) ) { char cchar=html_data[i]; int pos; int len = (int) strlen(keyword); // Replace (ignore case, and so on..) if ((pos=strcpos(KEYW_TRANSCODE_FROM,cchar))>=0) cchar=KEYW_TRANSCODE_TO[pos]; if (strchr(KEYW_ACCEPT,cchar)) { /* Ignore some characters at begining */ if ((len>0) || (!strchr(KEYW_IGNORE_BEG,cchar))) { keyword[len++]=cchar; keyword[len]='\0'; } } else if ( (strchr(KEYW_SPACE,cchar)) || (!cchar) ) { /* Avoid these words */ if (len>0) { if (strchr(KEYW_NOT_BEG,keyword[0])) { keyword[(len=0)]='\0'; } } /* Strip ending . and so */ { int ok=0; while((len = (int) strlen(keyword)) && (!ok)) { if (strchr(KEYW_STRIP_END,keyword[len-1])) { /* strip it */ keyword[len-1]='\0'; } else ok=1; } } /* Store it ? */ if (len >= KEYW_MIN_LEN ) { hts_primindex_words++; if (inthash_inc(WordIndexHash,keyword)) { /* added new */ fprintf(tmpfp,"%s\n",keyword); } } keyword[(len=0)]='\0'; } else /* Invalid */ keyword[(len=0)]='\0'; if (len>KEYW_LEN) { keyword[(len=0)]='\0'; } } } i++; } // Reset temp file fseek(tmpfp,0,SEEK_SET); // Process indexing for this page { //FILE* fp=NULL; //fp=fopen(concat(indexpath,"index.txt"),"ab"); if (fp_tmpproject) { while(!feof(tmpfp)) { char line[KEYW_LEN + 32]; linput(tmpfp,line,KEYW_LEN + 2); if (strnotempty(line)) { intptr_t e=0; if (inthash_read(WordIndexHash,line,&e)) { //if (e) { char BIGSTK savelst[HTS_URLMAXSIZE*2]; e++; /* 0 means "once" */ if (strncmp((const char*)fslash(catbuff,(char*)indexpath),filename,strlen(indexpath))==0) // couper strcpybuff(savelst,filename+strlen(indexpath)); else strcpybuff(savelst,filename); // Add entry for this file and word fprintf(fp_tmpproject,"%s %d %s\n",line,(int) (KEYW_SORT_MAXCOUNT - e),savelst); hts_primindex_size++; //} } } } //fclose(fp); } } // Delete temp file fclose(tmpfp); tmpfp=NULL; // Clear hash table inthash_delete(&WordIndexHash); #endif return 1; }
int __cdecl htsshow_loop(t_hts_callbackarg *carg, httrackp *opt, lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack static TStamp prev_mytime=0; /* ok */ static t_InpInfo SInfo; /* ok */ // TStamp mytime; long int rate=0; // int stat_written=-1; int stat_updated=-1; int stat_errors=-1; int stat_warnings=-1; int stat_infos=-1; int nbk=-1; LLint nb=-1; int stat_nsocket=-1; LLint stat_bytes=-1; LLint stat_bytes_recv=-1; int irate=-1; // char st[256]; /* Exit now */ if (commandEndRequested == 2) return 0; /* Lock */ webhttrack_lock(); if (stats) { stat_written=stats->stat_files; stat_updated=stats->stat_updated_files; stat_errors=stats->stat_errors; stat_warnings=stats->stat_warnings; stat_infos=stats->stat_infos; nbk=stats->nbk; stat_nsocket=stats->stat_nsocket; irate=(int)stats->rate; nb=stats->nb; stat_bytes=stats->nb; stat_bytes_recv=stats->HTS_TOTAL_RECV; } mytime=mtime_local(); if ((stat_time>0) && (stat_bytes_recv>0)) rate=(int)(stat_bytes_recv/stat_time); else rate=0; // pas d'infos /* Infos */ if (stat_bytes>=0) SInfo.stat_bytes=stat_bytes; // bytes if (stat_time>=0) SInfo.stat_time=stat_time; // time if (lien_tot>=0) SInfo.lien_tot=lien_tot; // nb liens if (lien_n>=0) SInfo.lien_n=lien_n; // scanned SInfo.stat_nsocket=stat_nsocket; // socks if (rate>0) SInfo.rate=rate; // rate if (irate>=0) SInfo.irate=irate; // irate if (SInfo.irate<0) SInfo.irate=SInfo.rate; if (nbk>=0) SInfo.stat_back=nbk; if (stat_written>=0) SInfo.stat_written=stat_written; if (stat_updated>=0) SInfo.stat_updated=stat_updated; if (stat_errors>=0) SInfo.stat_errors=stat_errors; if (stat_warnings>=0) SInfo.stat_warnings=stat_warnings; if (stat_infos>=0) SInfo.stat_infos=stat_infos; st[0]='\0'; qsec2str(st,stat_time); /* Set keys */ smallserver_setkeyint("info.stat_bytes", SInfo.stat_bytes); smallserver_setkeyint("info.stat_time", SInfo.stat_time); smallserver_setkeyint("info.lien_tot", SInfo.lien_tot); smallserver_setkeyint("info.lien_n", SInfo.lien_n); smallserver_setkeyint("info.stat_nsocket", SInfo.stat_nsocket); smallserver_setkeyint("info.rate", SInfo.rate); smallserver_setkeyint("info.irate", SInfo.irate); smallserver_setkeyint("info.stat_back", SInfo.stat_back); smallserver_setkeyint("info.stat_written", SInfo.stat_written); smallserver_setkeyint("info.stat_updated", SInfo.stat_updated); smallserver_setkeyint("info.stat_errors", SInfo.stat_errors); smallserver_setkeyint("info.stat_warnings", SInfo.stat_warnings); smallserver_setkeyint("info.stat_infos", SInfo.stat_infos); /* */ smallserver_setkey("info.stat_time_str", st); if ( ((mytime - prev_mytime)>100) || ((mytime - prev_mytime)<0) ) { prev_mytime=mytime; // parcourir registre des liens if (back_index>=0 && back_max > 0) { // seulement si index passé int j,k; int index=0; int ok=0; // idem int l; // idem // t_StatsBuffer StatsBuffer[NStatsBuffer]; { int i; for(i=0;i<NStatsBuffer;i++) { strcpybuff(StatsBuffer[i].state,""); strcpybuff(StatsBuffer[i].name,""); strcpybuff(StatsBuffer[i].file,""); strcpybuff(StatsBuffer[i].url_sav,""); StatsBuffer[i].back=0; StatsBuffer[i].size=0; StatsBuffer[i].sizetot=0; } } for(k=0;k<2;k++) { // 0: lien en cours 1: autres liens for(j=0;(j<3) && (index<NStatsBuffer);j++) { // passe de priorité int _i; for(_i=0+k;(_i< max(back_max*k,1) ) && (index<NStatsBuffer);_i++) { // no lien int i=(back_index+_i)%back_max; // commencer par le "premier" (l'actuel) if (back[i].status>=0) { // signifie "lien actif" // int ok=0; // OPTI ok=0; switch(j) { case 0: // prioritaire if ((back[i].status>0) && (back[i].status<99)) { strcpybuff(StatsBuffer[index].state,"receive"); ok=1; } break; case 1: if (back[i].status==STATUS_WAIT_HEADERS) { strcpybuff(StatsBuffer[index].state,"request"); ok=1; } else if (back[i].status==STATUS_CONNECTING) { strcpybuff(StatsBuffer[index].state,"connect"); ok=1; } else if (back[i].status==STATUS_WAIT_DNS) { strcpybuff(StatsBuffer[index].state,"search"); ok=1; } else if (back[i].status==STATUS_FTP_TRANSFER) { // ohh le beau ftp char proto[] = "ftp"; if (back[i].url_adr[0]) { char* ep = strchr(back[i].url_adr, ':'); char* eps = strchr(back[i].url_adr, '/'); int count; if (ep != NULL && ep < eps && (count = (int) (ep - back[i].url_adr) ) < 4) { proto[0] = '\0'; strncat(proto, back[i].url_adr, count); } } sprintf(StatsBuffer[index].state,"%s: %s",proto,back[i].info); ok=1; } break; default: if (back[i].status==STATUS_READY) { // prêt if ((back[i].r.statuscode==HTTP_OK)) { strcpybuff(StatsBuffer[index].state,"ready"); ok=1; } else if ((back[i].r.statuscode>=100) && (back[i].r.statuscode<=599)) { char tempo[256]; tempo[0]='\0'; infostatuscode(tempo,back[i].r.statuscode); strcpybuff(StatsBuffer[index].state,tempo); ok=1; } else { strcpybuff(StatsBuffer[index].state,"error"); ok=1; } } break; } if (ok) { char s[HTS_URLMAXSIZE*2]; // StatsBuffer[index].back=i; // index pour + d'infos // s[0]='\0'; strcpybuff(StatsBuffer[index].url_sav,back[i].url_sav); // pour cancel if (strcmp(back[i].url_adr,"file://")) strcatbuff(s,back[i].url_adr); else strcatbuff(s,"localhost"); if (back[i].url_fil[0]!='/') strcatbuff(s,"/"); strcatbuff(s,back[i].url_fil); StatsBuffer[index].file[0]='\0'; { char* a=strrchr(s,'/'); if (a) { strncatbuff(StatsBuffer[index].file,a,200); *a='\0'; } } if ((l = (int) strlen(s))<MAX_LEN_INPROGRESS) strcpybuff(StatsBuffer[index].name,s); else { // couper StatsBuffer[index].name[0]='\0'; strncatbuff(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2); strcatbuff(StatsBuffer[index].name,"..."); strcatbuff(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2); } if (back[i].r.totalsize>0) { // taille prédéfinie StatsBuffer[index].sizetot=back[i].r.totalsize; StatsBuffer[index].size=back[i].r.size; } else { // pas de taille prédéfinie if (back[i].status==STATUS_READY) { // prêt StatsBuffer[index].sizetot=back[i].r.size; StatsBuffer[index].size=back[i].r.size; } else { StatsBuffer[index].sizetot=8192; StatsBuffer[index].size=(back[i].r.size % 8192); } } index++; } } } } } /* Display current job */ { int parsing=0; if (commandEndRequested) smallserver_setkey("info.currentjob", "finishing pending transfers - Select [Cancel] to stop now!"); else if (!(parsing=hts_is_parsing(opt, -1))) smallserver_setkey("info.currentjob", "receiving files"); else { char tmp[1024]; tmp[0] = '\0'; switch(hts_is_testing(opt)) { case 0: sprintf(tmp, "parsing HTML file (%d%%)",parsing); break; case 1: sprintf(tmp, "parsing HTML file: testing links (%d%%)",parsing); break; case 2: sprintf(tmp, "purging files"); break; case 3: sprintf(tmp, "loading cache"); break; case 4: sprintf(tmp, "waiting (scheduler)"); break; case 5: sprintf(tmp, "waiting (throttle)"); break; } smallserver_setkey("info.currentjob", tmp); } } /* Display background jobs */ { int i; for(i=0;i<NStatsBuffer;i++) { if (strnotempty(StatsBuffer[i].state)) { strc_int2bytes2 strc; smallserver_setkeyarr("info.state[", i, "]", StatsBuffer[i].state); smallserver_setkeyarr("info.name[", i, "]", StatsBuffer[i].name); smallserver_setkeyarr("info.file[", i, "]", StatsBuffer[i].file); smallserver_setkeyarr("info.size[", i, "]", int2bytes(&strc,StatsBuffer[i].size)); smallserver_setkeyarr("info.sizetot[", i, "]", int2bytes(&strc,StatsBuffer[i].sizetot)); smallserver_setkeyarr("info.url_adr[", i, "]", StatsBuffer[i].url_adr); smallserver_setkeyarr("info.url_fil[", i, "]", StatsBuffer[i].url_fil); smallserver_setkeyarr("info.url_sav[", i, "]", StatsBuffer[i].url_sav); } } } } } /* UnLock */ webhttrack_release(); return 1; }
static int hts_acceptlink_(httrackp* opt, int ptr,int lien_tot,lien_url** liens, char* adr,char* fil, char* tag, char* attribute, int* set_prio_to, int* just_test_it) { int forbidden_url=-1; int meme_adresse; int embedded_triggered = 0; #define _FILTERS (*opt->filters.filters) #define _FILTERS_PTR (opt->filters.filptr) #define _ROBOTS ((robots_wizard*)opt->robotsptr) int may_set_prio_to=0; // -------------------- PHASE 0 -------------------- /* Infos */ if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"wizard test begins: %s%s"LF,adr,fil); test_flush; } /* Already exists? Then, we know that we knew that this link had to be known */ if (adr[0] != '\0' && fil[0] != '\0' && opt->hash != NULL && hash_read(opt->hash, adr, fil, 1, opt->urlhack) >= 0 ) { return 0; /* Yokai */ } // -------------------- PRELUDE OF PHASE 3-BIS -------------------- /* Built-in known tags (<img src=..>, ..) */ if (forbidden_url != 0 && opt->nearlink && tag != NULL && attribute != NULL) { int i; for(i = 0 ; hts_detect_embed[i].tag != NULL ; i++) { if (cmp_token(tag, hts_detect_embed[i].tag) && cmp_token(attribute, hts_detect_embed[i].attr) ) { embedded_triggered = 1; break; } } } // -------------------- PHASE 1 -------------------- /* Doit-on traiter les non html? */ if ((opt->getmode & 2)==0) { // non on ne doit pas if (!ishtml(opt,fil)) { // non il ne faut pas //adr[0]='\0'; // ne pas traiter ce lien, pas traiter forbidden_url=1; // interdire récupération du lien if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"non-html file ignored at %s : %s"LF,adr,fil); test_flush; } } } /* Niveau 1: ne pas parser suivant! */ if (ptr>0) { if ( ( liens[ptr]->depth <= 0 ) || ( liens[ptr]->depth <= 1 && !embedded_triggered ) ) { forbidden_url=1; // interdire récupération du lien if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"file from too far level ignored at %s : %s"LF,adr,fil); test_flush; } } } /* en cas d'échec en phase 1, retour immédiat! */ if (forbidden_url == 1) { return forbidden_url; } // -------------------- PHASE 2 -------------------- // ------------------------------------------------------ // doit-on traiter ce lien?.. vérifier droits de déplacement meme_adresse=strfield2(adr,urladr); if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); if (meme_adresse) fprintf(opt->log,"Compare addresses: %s=%s"LF,adr,urladr); else fprintf(opt->log,"Compare addresses: %s!=%s"LF,adr,urladr); test_flush; } if (meme_adresse) { // même adresse { // tester interdiction de descendre // MODIFIE : en cas de remontée puis de redescente, il se pouvait qu'on ne puisse pas atteindre certains fichiers // problème: si un fichier est virtuellement accessible via une page mais dont le lien est sur une autre *uniquement*.. char BIGSTK tempo[HTS_URLMAXSIZE*2]; char BIGSTK tempo2[HTS_URLMAXSIZE*2]; tempo[0] = tempo2[0] = '\0'; // note (up/down): on calcule à partir du lien primaire, ET du lien précédent. // ex: si on descend 2 fois on peut remonter 1 fois if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) { if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) { if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"build relative links to test: %s %s (with %s and %s)"LF,tempo,tempo2,liens[liens[ptr]->premier]->fil,liens[ptr]->fil); test_flush; } // si vient de primary, ne pas tester lienrelatif avec (car host "différent") /*if (liens[liens[ptr]->premier] == 0) { // vient de primary } */ // NEW: finalement OK, sauf pour les moved repérés par link_import // PROBLEME : annulé a cause d'un lien éventuel isolé accepté..qui entrainerait un miroir // (test même niveau (NOUVEAU à cause de certains problèmes de filtres non intégrés)) // NEW if ( (tempo[0] != '\0' && tempo[1] != '\0' && strchr(tempo+1,'/') == 0) || (tempo2[0] != '\0' && tempo2[1] != '\0' && strchr(tempo2+1,'/') == 0) ) { if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' forbidden_url=0; if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil); test_flush; } } } // down if ( (strncmp(tempo,"../",3)) || (strncmp(tempo2,"../",3))) { // pas montée sinon ne nbous concerne pas int test1,test2; if (!strncmp(tempo,"../",3)) test1=0; else test1 = (strchr(tempo +((*tempo =='/')?1:0),'/')!=NULL); if (!strncmp(tempo2,"../",3)) test2=0; else test2 = (strchr(tempo2+((*tempo2=='/')?1:0),'/')!=NULL); if ( (test1) && (test2) ) { // on ne peut que descendre if ((opt->seeker & 1)==0) { // interdiction de descendre forbidden_url=1; if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"lower link canceled: %s%s"LF,adr,fil); test_flush; } } else { // autorisé à priori - NEW if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' forbidden_url=0; if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil); test_flush; } } } } else if ( (test1) || (test2) ) { // on peut descendre pour accéder au lien if ((opt->seeker & 1)!=0) { // on peut descendre - NEW if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' forbidden_url=0; if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil); test_flush; } } } } } // up if ( (!strncmp(tempo,"../",3)) && (!strncmp(tempo2,"../",3)) ) { // impossible sans monter if ((opt->seeker & 2)==0) { // interdiction de monter forbidden_url=1; if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"upper link canceled: %s%s"LF,adr,fil); test_flush; } } else { // autorisé à monter - NEW if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' forbidden_url=0; if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil); test_flush; } } } } else if ( (!strncmp(tempo,"../",3)) || (!strncmp(tempo2,"../",3)) ) { // Possible en montant if ((opt->seeker & 2)!=0) { // autorisé à monter - NEW if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' forbidden_url=0; if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil); test_flush; } } } // sinon autorisé en descente } } else { if (opt->log) { fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil); test_flush; } } } else { if (opt->log) { fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil); test_flush; } } } // tester interdiction de descendre? { // tester interdiction de monter char BIGSTK tempo[HTS_URLMAXSIZE*2]; char BIGSTK tempo2[HTS_URLMAXSIZE*2]; if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) { if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) { } else { if (opt->log) { fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil); test_flush; } } } else { if (opt->log) { fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil); test_flush; } } } // fin tester interdiction de monter } else { // adresse différente, sortir? //if (!opt->wizard) { // mode non wizard // doit-on traiter ce lien?.. vérifier droits de sortie switch((opt->travel & 255)) { case 0: if (!opt->wizard) // mode non wizard forbidden_url=1; break; // interdicton de sortir au dela de l'adresse case 1: { // sortie sur le même dom.xxx size_t i = strlen(adr)-1; size_t j = strlen(urladr)-1; while( (i>0) && (adr[i]!='.')) i--; while( (j>0) && (urladr[j]!='.')) j--; i--; j--; while( (i>0) && (adr[i]!='.')) i--; while( (j>0) && (urladr[j]!='.')) j--; if ((i>0) && (j>0)) { if (!strfield2(adr+i,urladr+j)) { // != if (!opt->wizard) { // mode non wizard //printf("refused: %s\n",adr); forbidden_url=1; // pas même domaine if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"foreign domain link canceled: %s%s"LF,adr,fil); test_flush; } } } else { if (opt->wizard) { // mode wizard forbidden_url=0; // même domaine if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"same domain link authorized: %s%s"LF,adr,fil); test_flush; } } } } else forbidden_url=1; } break; case 2: { // sortie sur le même .xxx size_t i = strlen(adr)-1; size_t j = strlen(urladr)-1; while( (i>0) && (adr[i]!='.')) i--; while( (j>0) && (urladr[j]!='.')) j--; if ((i>0) && (j>0)) { if (!strfield2(adr+i,urladr+j)) { // !- if (!opt->wizard) { // mode non wizard //printf("refused: %s\n",adr); forbidden_url=1; // pas même .xx if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"foreign location link canceled: %s%s"LF,adr,fil); test_flush; } } } else { if (opt->wizard) { // mode wizard forbidden_url=0; // même domaine if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"same location link authorized: %s%s"LF,adr,fil); test_flush; } } } } else forbidden_url=1; } break; case 7: // everywhere!! if (opt->wizard) { // mode wizard forbidden_url=0; break; } } // switch // ANCIENNE POS -- récupérer les liens à côtés d'un lien (nearlink) } // fin test adresse identique/différente // -------------------- PHASE 3 -------------------- // récupérer les liens à côtés d'un lien (nearlink) (nvelle pos) if (forbidden_url != 0 && opt->nearlink) { if (!ishtml(opt,fil)) { // non html //printf("ok %s%s\n",ad,fil); forbidden_url=0; // autoriser may_set_prio_to=1+1; // set prio to 1 (parse but skip urls) if near is the winner if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"near link authorized: %s%s"LF,adr,fil); test_flush; } } } // -------------------- PHASE 3-BIS -------------------- /* Built-in known tags (<img src=..>, ..) */ if (forbidden_url != 0 && embedded_triggered) { forbidden_url=0; // autoriser may_set_prio_to=1+1; // set prio to 1 (parse but skip urls) if near is the winner if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"near link authorized (friendly tag): %s%s"LF,adr,fil); test_flush; } } // -------------------- PHASE 4 -------------------- // ------------------------------------------------------ // Si wizard, il se peut qu'on autorise ou qu'on interdise // un lien spécial avant même de tester sa position, sa hiérarchie etc. // peut court-circuiter le forbidden_url précédent if (opt->wizard) { // le wizard entre en action.. // int question=1; // poser une question int force_mirror=0; // pour mirror links int filters_answer=0; // décision prise par les filtres char BIGSTK l[HTS_URLMAXSIZE*2]; char BIGSTK lfull[HTS_URLMAXSIZE*2]; if (forbidden_url!=-1) question=0; // pas de question, résolu // former URL complète du lien actuel strcpybuff(l,jump_identification(adr)); if (*fil!='/') strcatbuff(l,"/"); strcatbuff(l,fil); // full version (http://foo:[email protected]/bar.html) if (!link_has_authority(adr)) strcpybuff(lfull,"http://"); else lfull[0]='\0'; strcatbuff(lfull,adr); if (*fil!='/') strcatbuff(lfull,"/"); strcatbuff(lfull,fil); // tester filters (URLs autorisées ou interdites explicitement) // si lien primaire on saute le joker, on est pas lémur if (ptr==0) { // lien primaire, autoriser question=1; // la question sera résolue automatiquement forbidden_url=0; may_set_prio_to=0; // clear may-set flag } else { // eternal depth first // vérifier récursivité extérieure if (opt->extdepth>0) { if ( /*question && */ (ptr>0) && (!force_mirror)) { // well, this is kinda a hak // we don't want to mirror EVERYTHING, and we have to decide where to stop // there is no way yet to tag "external" links, and therefore links that are // "weak" (authorized depth < external depth) are just not considered for external // hack if (liens[ptr]->depth > opt->extdepth) { // *set_prio_to = opt->extdepth + 1; *set_prio_to = 1 + (opt->extdepth); may_set_prio_to=0; // clear may-set flag forbidden_url=0; // autorisé question=0; // résolution auto if ((opt->debug>1) && (opt->log!=NULL)) { if (question) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) ambiguous link accepted (external depth): link %s at %s%s"LF,l,urladr,urlfil); } else { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) forced to accept link (external depth): link %s at %s%s"LF,l,urladr,urlfil); } test_flush; } } } } // filters { int jok; char* mdepth=""; // filters, 0=sait pas 1=ok -1=interdit { int jokDepth1=0,jokDepth2=0; int jok1=0,jok2=0; jok1 = fa_strjoker(/*url*/0, _FILTERS,*_FILTERS_PTR,lfull,NULL,NULL,&jokDepth1); jok2 = fa_strjoker(/*url*/0, _FILTERS,*_FILTERS_PTR,l, NULL,NULL,&jokDepth2); if (jok2 == 0) { // #2 doesn't know jok = jok1; // then, use #1 mdepth = _FILTERS[jokDepth1]; } else if (jok1 == 0) { // #1 doesn't know jok = jok2; // then, use #2 mdepth = _FILTERS[jokDepth2]; } else if (jokDepth1 >= jokDepth2) { // #1 matching rule is "after" #2, then it is prioritary jok = jok1; mdepth = _FILTERS[jokDepth1]; } else { // #2 matching rule is "after" #1, then it is prioritary jok = jok2; mdepth = _FILTERS[jokDepth2]; } } if (jok == 1) { // autorisé filters_answer=1; // décision prise par les filtres question=0; // ne pas poser de question, autorisé forbidden_url=0; // URL autorisée may_set_prio_to=0; // clear may-set flag if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) explicit authorized (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil); test_flush; } } else if (jok == -1) { // forbidden filters_answer=1; // décision prise par les filtres question=0; // ne pas poser de question: forbidden_url=1; // URL interdite if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) explicit forbidden (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil); test_flush; } } // sinon on touche à rien } } // vérifier mode mirror links if (question) { if (opt->mirror_first_page) { // mode mirror links if (liens[ptr]->precedent==0) { // parent=primary! forbidden_url=0; // autorisé may_set_prio_to=0; // clear may-set flag question=1; // résolution auto force_mirror=5; // mirror (5) if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) explicit mirror link: link %s at %s%s"LF,l,urladr,urlfil); test_flush; } } } } // on doit poser la question.. peut on la poser? // (oui je sais quel preuve de délicatesse, merci merci) if ((question) && (ptr>0) && (!force_mirror)) { if (opt->wizard==2) { // éliminer tous les liens non répertoriés comme autorisés (ou inconnus) question=0; forbidden_url=1; if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) ambiguous forbidden link: link %s at %s%s"LF,l,urladr,urlfil); test_flush; } } } // vérifier robots.txt if (opt->robots) { int r = checkrobots(_ROBOTS,adr,fil); if (r == -1) { // interdiction #if DEBUG_ROBOTS printf("robots.txt forbidden: %s%s\n",adr,fil); #endif // question résolue, par les filtres, et mode robot non strict if ((!question) && (filters_answer) && (opt->robots == 1) && (forbidden_url!=1)) { r=0; // annuler interdiction des robots if (!forbidden_url) { if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Warning link followed against robots.txt: link %s at %s%s"LF,l,adr,fil); test_flush; } } } if (r == -1) { // interdire forbidden_url=1; question=0; if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(robots.txt) forbidden link: link %s at %s%s"LF,l,adr,fil); test_flush; } } } } if (!question) { if ((opt->debug>1) && (opt->log!=NULL)) { if (!forbidden_url) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) shared foreign domain link: link %s at %s%s"LF,l,urladr,urlfil); } else { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) cancelled foreign domain link: link %s at %s%s"LF,l,urladr,urlfil); } test_flush; } #if BDEBUG==3 printf("at %s in %s, wizard says: url %s ",urladr,urlfil,l); if (forbidden_url) printf("cancelled"); else printf(">SHARED<"); printf("\n"); #endif } /* en cas de question, ou lien primaire (enregistrer autorisations) */ if (question || (ptr==0)) { const char* s; int n=0; // si primaire (plus bas) alors ... if ((ptr!=0) && (force_mirror==0)) { char BIGSTK tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; strcatbuff(tempo,adr); strcatbuff(tempo,fil); s = RUN_CALLBACK1(opt, query3, tempo); if (strnotempty(s)==0) // entrée n=0; else if (isdigit((unsigned char)*s)) sscanf(s,"%d",&n); else { switch(*s) { case '*': n=-1; break; case '!': n=-999; { /*char *a; int i; a=copie_de_adr-128; if (a<r.adr) a=r.adr; for(i=0;i<256;i++) { if (a==copie_de_adr) printf("\nHERE:\n"); printf("%c",*a++); } printf("\n\n"); */ } break; default: n=-999; printf("What did you say?\n"); break; } } io_flush; } else { // lien primaire: autoriser répertoire entier if (!force_mirror) { if ((opt->seeker & 1)==0) { // interdiction de descendre n=7; } else { n=5; // autoriser miroir répertoires descendants (lien primaire) } } else // forcer valeur (sub-wizard) n=force_mirror; } /* sanity check - reallocate filters HERE */ if ((*_FILTERS_PTR) + 1 >= opt->maxfilter) { opt->maxfilter += HTS_FILTERSINC; if (filters_init(&_FILTERS, opt->maxfilter, HTS_FILTERSINC) == 0) { printf("PANIC! : Too many filters : >%d [%d]\n", (*_FILTERS_PTR),__LINE__); fflush(stdout); if (opt->log) { fprintf(opt->log,LF"Too many filters, giving up..(>%d)"LF, (*_FILTERS_PTR) ); fprintf(opt->log,"To avoid that: use #F option for more filters (example: -#F5000)"LF); test_flush; } assertf("too many filters - giving up" == NULL); // wild.. } } // here we have enough room for a new filter if necessary switch(n) { case -1: // sauter tout le reste forbidden_url=1; opt->wizard=2; // sauter tout le reste break; case 0: // interdire les mêmes liens: adr/fil forbidden_url=1; HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(_FILTERS[0],"-"); strcatbuff(_FILTERS[0],jump_identification(adr)); if (*fil!='/') strcatbuff(_FILTERS[0],"/"); strcatbuff(_FILTERS[0],fil); break; case 1: // éliminer répertoire entier et sous rép: adr/path/ * forbidden_url=1; { size_t i = strlen(fil)-1; while((fil[i]!='/') && (i>0)) i--; if (fil[i]=='/') { HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(_FILTERS[0],"-"); strcatbuff(_FILTERS[0],jump_identification(adr)); if (*fil!='/') strcatbuff(_FILTERS[0],"/"); strncatbuff(_FILTERS[0] ,fil,i); if (_FILTERS[0][strlen(_FILTERS[0])-1]!='/') strcatbuff(_FILTERS[0],"/"); strcatbuff(_FILTERS[0],"*"); } } // ** ... break; case 2: // adresse adr* forbidden_url=1; HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(_FILTERS[0],"-"); strcatbuff(_FILTERS[0],jump_identification(adr)); strcatbuff(_FILTERS[0],"*"); break; case 3: // ** A FAIRE forbidden_url=1; /* { int i=strlen(adr)-1; while((adr[i]!='/') && (i>0)) i--; if (i>0) { } }*/ break; // case 4: // same link // PAS BESOIN!! /*HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(_FILTERS[0],"+"); strcatbuff(_FILTERS[0],adr); if (*fil!='/') strcatbuff(_FILTERS[0],"/"); strcatbuff(_FILTERS[0],fil);*/ // étant donné le renversement wizard/primary filter (les primary autorisent up/down ET interdisent) // il faut éviter d'un lien isolé effectue un miroir total.. *set_prio_to = 0+1; // niveau de récursion=0 (pas de miroir) break; case 5: // autoriser répertoire entier et fils if ((opt->seeker & 2)==0) { // interdiction de monter size_t i = strlen(fil)-1; while((fil[i]!='/') && (i>0)) i--; if (fil[i]=='/') { HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(_FILTERS[0],"+"); strcatbuff(_FILTERS[0],jump_identification(adr)); if (*fil!='/') strcatbuff(_FILTERS[0],"/"); strncatbuff(_FILTERS[0],fil,i+1); strcatbuff(_FILTERS[0],"*"); } } else { // autoriser domaine alors!! HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(filters[filptr],"+"); strcpybuff(_FILTERS[0],"+"); strcatbuff(_FILTERS[0],jump_identification(adr)); strcatbuff(_FILTERS[0],"*"); } break; case 6: // same domain HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(filters[filptr],"+"); strcpybuff(_FILTERS[0],"+"); strcatbuff(_FILTERS[0],jump_identification(adr)); strcatbuff(_FILTERS[0],"*"); break; // case 7: // autoriser ce répertoire { size_t i = strlen(fil)-1; while((fil[i]!='/') && (i>0)) i--; if (fil[i]=='/') { HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(_FILTERS[0],"+"); strcatbuff(_FILTERS[0],jump_identification(adr)); if (*fil!='/') strcatbuff(_FILTERS[0],"/"); strncatbuff(_FILTERS[0],fil,i+1); strcatbuff(_FILTERS[0],"*[file]"); } } break; case 50: // on fait rien break; } // switch } // test du wizard sur l'url } // fin du test wizard.. // -------------------- PHASE 5 -------------------- // lien non autorisé, peut-on juste le tester? if (just_test_it) { if (forbidden_url==1) { if (opt->travel&256) { // tester tout de même if (strfield(adr,"ftp://")==0 #if HTS_USEMMS && strfield(adr,"mms://")==0 #endif ) { // PAS ftp! forbidden_url=1; // oui oui toujours interdit (note: sert à rien car ==1 mais c pour comprendre) *just_test_it=1; // mais on teste if ((opt->debug>1) && (opt->log!=NULL)) { HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Testing link %s%s"LF,adr,fil); } } } } //adr[0]='\0'; // cancel } // -------------------- FINAL PHASE -------------------- // Test if the "Near" test won if (may_set_prio_to && forbidden_url == 0) { *set_prio_to = may_set_prio_to; } return forbidden_url; #undef _FILTERS #undef _FILTERS_PTR #undef _ROBOTS }
/* Note: NOT utf-8 */ int optinclude_file(const char *name, int *argc, char **argv, char *x_argvblk, int *x_ptr) { FILE *fp; fp = fopen(name, "rb"); if (fp) { char line[256]; int insert_after = 1; /* first, insert after program filename */ while(!feof(fp)) { char *a, *b; int result; /* read line */ linput(fp, line, 250); hts_lowcase(line); if (strnotempty(line)) { /* no comment line: # // ; */ if (strchr("#/;", line[0]) == NULL) { /* right trim */ a = line + strlen(line) - 1; while(is_realspace(*a)) *(a--) = '\0'; /* jump "set " and spaces */ a = line; while(is_realspace(*a)) a++; if (strncmp(a, "set", 3) == 0) { if (is_realspace(*(a + 3))) { a += 4; } } while(is_realspace(*a)) a++; /* delete = ("sockets=8") */ if ((b = strchr(a, '='))) *b = ' '; /* isolate option and parameter */ b = a; while((!is_realspace(*b)) && (*b)) b++; if (*b) { *b = '\0'; b++; } /* a is now the option, b the parameter */ { int return_argc; char return_error[256]; char _tmp_argv[4][HTS_CDLMAXSIZE]; char *tmp_argv[4]; tmp_argv[0] = _tmp_argv[0]; tmp_argv[1] = _tmp_argv[1]; tmp_argv[2] = _tmp_argv[2]; tmp_argv[3] = _tmp_argv[3]; strcpybuff(_tmp_argv[0], "--"); strcatbuff(_tmp_argv[0], a); strcpybuff(_tmp_argv[1], b); result = optalias_check(2, (const char *const *) tmp_argv, 0, &return_argc, (tmp_argv + 2), return_error); if (!result) { printf("%s\n", return_error); } else { int insert_after_argc; /* Insert parameters BUT so that they can be in the same order */ /* temporary argc: Number of parameters after minus insert_after_argc */ insert_after_argc = (*argc) - insert_after; cmdl_ins((tmp_argv[2]), insert_after_argc, (argv + insert_after), x_argvblk, (*x_ptr)); *argc = insert_after_argc + insert_after; insert_after++; /* Second one */ if (return_argc > 1) { insert_after_argc = (*argc) - insert_after; cmdl_ins((tmp_argv[3]), insert_after_argc, (argv + insert_after), x_argvblk, (*x_ptr)); *argc = insert_after_argc + insert_after; insert_after++; } /* increment to nbr of used parameters */ /* insert_after+=result; */ } } } } } fclose(fp); return 1; } return 0; }
// forme à partir d'un lien et du contexte (origin_fil et origin_adr d'où il est tiré) adr et fil // [adr et fil sont des buffers de 1ko] // 0 : ok // -1 : erreur // -2 : protocole non supporté (ftp) int ident_url_relatif(const char *lien, const char *origin_adr, const char *origin_fil, lien_adrfil* const adrfil) { int ok = 0; int scheme = 0; assertf(adrfil != NULL); adrfil->adr[0] = '\0'; adrfil->fil[0] = '\0'; //effacer buffers // lien non vide! if (strnotempty(lien) == 0) return -1; // erreur! // Scheme? { const char *a = lien; while(isalpha((unsigned char) *a)) a++; if (*a == ':') scheme = 1; } // filtrer les parazites (mailto & cie) // scheme+authority (//) if ((strfield(lien, "http://")) // scheme+// || (strfield(lien, "file://")) // scheme+// || (strncmp(lien, "//", 2) == 0) // // sans scheme (-> default) ) { if (ident_url_absolute(lien, adrfil) == -1) { ok = -1; // erreur URL } } else if (strfield(lien, "ftp://")) { // Note: ftp:foobar.gif is not valid if (ftp_available()) { // ftp supporté if (ident_url_absolute(lien, adrfil) == -1) { ok = -1; // erreur URL } } else { ok = -2; // non supporté } #if HTS_USEOPENSSL } else if (strfield(lien, "https://")) { // Note: ftp:foobar.gif is not valid if (ident_url_absolute(lien, adrfil) == -1) { ok = -1; // erreur URL } #endif } else if ((scheme) && ((!strfield(lien, "http:")) && (!strfield(lien, "https:")) && (!strfield(lien, "ftp:")) )) { ok = -1; // unknown scheme } else { // c'est un lien relatif // On forme l'URL complète à partie de l'url actuelle // et du chemin actuel si besoin est. // sanity check if (origin_adr == NULL || origin_fil == NULL || *origin_adr == '\0' || *origin_fil == '\0') { return -1; } // copier adresse if (((int) strlen(origin_adr) < HTS_URLMAXSIZE) && ((int) strlen(origin_fil) < HTS_URLMAXSIZE) && ((int) strlen(lien) < HTS_URLMAXSIZE)) { /* patch scheme if necessary */ if (strfield(lien, "http:")) { lien += 5; strcpybuff(adrfil->adr, jump_protocol_const(origin_adr)); // même adresse ; protocole vide (http) } else if (strfield(lien, "https:")) { lien += 6; strcpybuff(adrfil->adr, "https://"); // même adresse forcée en https strcatbuff(adrfil->adr, jump_protocol_const(origin_adr)); } else if (strfield(lien, "ftp:")) { lien += 4; strcpybuff(adrfil->adr, "ftp://"); // même adresse forcée en ftp strcatbuff(adrfil->adr, jump_protocol_const(origin_adr)); } else { strcpybuff(adrfil->adr, origin_adr); // même adresse ; et même éventuel protocole } if (*lien != '/') { // sinon c'est un lien absolu if (*lien == '\0') { strcpybuff(adrfil->fil, origin_fil); } else if (*lien == '?') { // example: a href="?page=2" char *a; strcpybuff(adrfil->fil, origin_fil); a = strchr(adrfil->fil, '?'); if (a) *a = '\0'; strcatbuff(adrfil->fil, lien); } else { const char *a = strchr(origin_fil, '?'); if (a == NULL) a = origin_fil + strlen(origin_fil); while((*a != '/') && (a > origin_fil)) a--; if (*a == '/') { // ok on a un '/' if ((((int) (a - origin_fil)) + 1 + strlen(lien)) < HTS_URLMAXSIZE) { // copier chemin strncpy(adrfil->fil, origin_fil, ((int) (a - origin_fil)) + 1); *(adrfil->fil + ((int) (a - origin_fil)) + 1) = '\0'; // copier chemin relatif if (((int) strlen(adrfil->fil) + (int) strlen(lien)) < HTS_URLMAXSIZE) { strcatbuff(adrfil->fil, lien + ((*lien == '/') ? 1 : 0)); // simplifier url pour les ../ fil_simplifie(adrfil->fil); } else ok = -1; // erreur } else { // erreur ok = -1; // erreur URL } } else { // erreur ok = -1; // erreur URL } } } else { // chemin absolu // copier chemin directement strcatbuff(adrfil->fil, lien); fil_simplifie(adrfil->fil); } // *lien!='/' } else ok = -1; } // test news: etc. // case insensitive pour adresse { char *a = jump_identification(adrfil->adr); while(*a) { if ((*a >= 'A') && (*a <= 'Z')) *a += 'a' - 'A'; a++; } } // IDNA / RFC 3492 (Punycode) handling for HTTP(s) if (!link_has_authority(adrfil->adr) || strfield(adrfil->adr, "https:")) { char *const a = jump_identification(adrfil->adr); // Non-ASCII characters (theorically forbidden, but browsers are lenient) if (!hts_isStringAscii(a, strlen(a))) { char *const idna = hts_convertStringUTF8ToIDNA(a, strlen(a)); if (idna != NULL) { if (strlen(idna) < HTS_URLMAXSIZE) { strcpybuff(a, idna); } free(idna); } } } return ok; }