/* Replaces all "offending" IIS extensions (exe, dll..) with "nice" ones */ static int mysavename(t_hts_callbackarg * carg, httrackp * opt, const char *adr_complete, const char *fil_complete, const char *referer_adr, const char *referer_fil, char *save) { static const char *iisBogus[] = { ".com", ".exe", ".dll", ".sh", NULL }; static const char *iisBogusReplace[] = { ".c0m", ".ex3", ".dl1", ".5h", NULL }; /* MUST be the same sizes */ char *a; /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, savename) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, savename) (CALLBACKARG_PREV_CARG(carg), opt, adr_complete, fil_complete, referer_adr, referer_fil, save)) { return 0; /* Abort */ } } /* Process */ for(a = save; *a != '\0'; a++) { int i; for(i = 0; iisBogus[i] != NULL; i++) { int j; for(j = 0; iisBogus[i][j] == a[j] && iisBogus[i][j] != '\0'; j++) ; if (iisBogus[i][j] == '\0' && (a[j] == '\0' || a[j] == '/' || a[j] == '\\')) { strncpy(a, iisBogusReplace[i], strlen(iisBogusReplace[i])); break; } } } return 1; /* success */ }
static int mysavename(t_hts_callbackarg * carg, httrackp * opt, const char *adr_complete, const char *fil_complete, const char *referer_adr, const char *referer_fil, char *save) { char *a; /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, savename) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, savename) (CALLBACKARG_PREV_CARG(carg), opt, adr_complete, fil_complete, referer_adr, referer_fil, save)) { return 0; /* Abort */ } } /* Process */ for(a = save; *a != 0; a++) { char c = TOLOWER(*a); if (c >= 'a' && c <= 'z') *a = (((c - 'a') + 13) % 26) + 'a'; // ROT-13 } return 1; /* success */ }
static int process(t_hts_callbackarg * carg, httrackp * opt, char *html, int len, const char *address, const char *filename) { t_my_userdef *userdef = (t_my_userdef *) CALLBACKARG_USERDEF(carg); /*char * const stringfilter = userdef->stringfilter; */ char **const stringfilters = userdef->stringfilters; /* */ int i = 0; int getIt = 0; char *pos; /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, check_html) (CALLBACKARG_PREV_CARG(carg), opt, html, len, address, filename)) { return 0; /* Abort */ } } /* Process */ if (strcmp(address, "primary") == 0 && strcmp(filename, "/primary") == 0) /* primary page (list of links) */ return 1; while(stringfilters[i] != NULL && !getIt) { if ((pos = strstr(html, stringfilters[i])) != NULL) { int j; getIt = 1; fprintf(stderr, "** callback info: found '%s' keyword in '%s%s', crawling this page!\n", stringfilters[i], address, filename); fprintf(stderr, "** details:\n(..)"); for(j = 0; j < 72 && pos[j]; j++) { if (pos[j] > 32) fprintf(stderr, "%c", pos[j]); else fprintf(stderr, "?"); } fprintf(stderr, "(..)\n"); } i++; } if (getIt) { return 1; /* success */ } else { fprintf(stderr, "** callback info: won't parse '%s%s' (no specified keywords found)\n", address, filename); return 0; /* this page sucks, don't parse it */ } }
/* local function called as "end" callback */ static int end_of_mirror(t_hts_callbackarg *carg, httrackp *opt) { const char *arginfo = (char*) CALLBACKARG_USERDEF(carg); fprintf(stderr, "* mirror end\n"); hts_log(opt, arginfo, "mirror ended"); /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) { /* status is ok on our side, return other callabck's status */ return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt); } return 1; /* success */ }
/* local function called as "end" callback */ static int end_of_mirror(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt) { void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg); /*optional user-defined arg*/ /* processing */ fprintf(stderr, "That's all, folks!\n"); /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) { /* status is ok on our side, return other callabck's status */ return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt); } return 1; /* success */ }
static int check_loop(t_hts_callbackarg *carg, httrackp *opt, void* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,void* stats) { static int fun_animation=0; /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, loop) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, loop)(CALLBACKARG_PREV_CARG(carg), opt, back, back_max, back_index, lien_tot, lien_ntot, stat_time, stats)) { return 0; /* Abort */ } } /* Process */ printf("%c\r", "/-\\|"[(fun_animation++)%4]); return 1; }
static int end(t_hts_callbackarg *carg, httrackp *opt) { t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg); fprintf(stderr, "** info: wrapper_exit() called!\n"); if (userdef != NULL) { free(userdef); userdef = NULL; } /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) { return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt); } return 1; /* success */ }
/* local function called as "check_html" callback */ static int process_file(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt, /*other parameters are callback-specific*/ char* html, int len, const char* url_address, const char* url_file) { void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg); /*optional user-defined arg*/ /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) { return 0; /* abort */ } } printf("file %s%s content: %s\n", url_address, url_file, html); return 1; /* success */ }
static int check_detectedlink_end(t_hts_callbackarg *carg, httrackp *opt) { char *base = (char*) CALLBACKARG_USERDEF(carg); fprintf(stderr, "Unplugged ..\n"); if (base != NULL) { free(base); base = NULL; } /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) { return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt); } return 1; /* success */ }
static int check_detectedlink(t_hts_callbackarg *carg, httrackp *opt, char* link) { t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg); char * const currentURLBeingParsed = userdef->currentURLBeingParsed; /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, linkdetected)(CALLBACKARG_PREV_CARG(carg), opt, link)) { return 0; /* Abort */ } } /* Process */ printf("[%s] -> [%s]\n", currentURLBeingParsed, link); return 1; /* success */ }
static int process_file(t_hts_callbackarg *carg, httrackp* opt, char* html, int len, const char* url_address, const char* url_file) { char* prevBase; /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) { return 0; /* Abort */ } } /* Disable base href, if any */ if ( ( prevBase = strstr(html, "<BASE HREF=\"") ) != NULL) { prevBase[1] = 'X'; } return 1; /* success */ }
static int hts_detect_java(t_hts_callbackarg * carg, httrackp * opt, htsmoduleStruct * str) { /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, detect) != NULL) { if (CALLBACKARG_PREV_FUN(carg, detect) (CALLBACKARG_PREV_CARG(carg), opt, str)) { return 1; /* Found before us, let them have the priority */ } } /* Check MIME */ if (detect_mime(str)) { str->wrapper_name = libName; /* Our ID */ return 1; /* Known format, we take it */ } return 0; /* Unknown format */ }
static int process_file(t_hts_callbackarg *carg, httrackp *opt, char* html, int len, const char* url_address, const char* url_file) { t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg); char * const currentURLBeingParsed = userdef->currentURLBeingParsed; /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) { return 0; /* Abort */ } } /* Process */ printf("now parsing %s%s..\n", url_address, url_file); strcpy(currentURLBeingParsed, url_address); strcat(currentURLBeingParsed, url_file); return 1; /* success */ }
static int check_detectedlink(t_hts_callbackarg *carg, httrackp* opt, char* link) { const char *base = (char*) CALLBACKARG_USERDEF(carg); /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, linkdetected)(CALLBACKARG_PREV_CARG(carg), opt, link)) { return 0; /* Abort */ } } /* The incoming (read/write) buffer is at least HTS_URLMAXSIZE bytes long */ if (strncmp(link, "http://", 7) == 0 || strncmp(link, "https://", 8) == 0) { char temp[HTS_URLMAXSIZE * 2]; strcpy(temp, base); strcat(temp, link); strcpy(link, temp); } return 1; /* success */ }
/* local function called as "check_html" callback */ static int process_file(t_hts_callbackarg *carg, httrackp *opt, char* html, int len, const char* url_address, const char* url_file) { void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg); /*optional user-defined arg*/ char *fmt; /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) { return 0; /* abort */ } } /* log */ fprintf(stderr, "* parsing file %s%s\n", url_address, url_file); fmt = malloc(strlen(url_address) + strlen(url_file) + 128); sprintf(fmt, " parsing file %s%s", url_address, url_file); hts_log(opt, "log-wrapper-info", fmt); free(fmt); return 1; /* success */ }
static int hts_parse_java(t_hts_callbackarg * carg, httrackp * opt, htsmoduleStruct * str) { /* The wrapper_name memebr has changed: not for us anymore */ if (str->wrapper_name == NULL || strcmp(str->wrapper_name, libName) != 0) { /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, parse) != NULL) { return CALLBACKARG_PREV_FUN(carg, parse) (CALLBACKARG_PREV_CARG(carg), opt, str); } strcpy(str->err_msg, "unexpected error: bad wrapper_name and no previous wrapper"); return 0; /* Unexpected error */ } else { if (detect_mime(str)) { /* (Legacy code) */ char catbuff[CATBUFF_SIZE]; FILE *fpout; JAVA_HEADER header; RESP_STRUCT *tab; const char *file = str->filename; str->relativeToHtmlLink = 1; #if JAVADEBUG printf("fopen\n"); #endif if ((fpout = FOPEN(fconv(catbuff, sizeof(catbuff), file), "r+b")) == NULL) { //fprintf(stderr, "Cannot open input file.\n"); sprintf(str->err_msg, "Unable to open file %s", file); return 0; // une erreur.. } #if JAVADEBUG printf("fread\n"); #endif //if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) { // pas complet.. if (fread(&header, 1, 10, fpout) != 10) { // pas complet.. fclose(fpout); sprintf(str->err_msg, "File header too small (file len = " LLintP ")", (LLint) fsize(file)); return 0; } #if JAVADEBUG printf("header\n"); #endif // tester en tête if (reverse_endian()) { header.magic = hts_swap32(header.magic); header.count = hts_swap16(header.count); } if (header.magic != 0xCAFEBABE) { sprintf(str->err_msg, "non java file"); if (fpout) { fclose(fpout); fpout = NULL; } return 0; } tab = (RESP_STRUCT *) calloc(header.count, sizeof(RESP_STRUCT)); if (!tab) { sprintf(str->err_msg, "Unable to alloc %d bytes", (int) sizeof(RESP_STRUCT)); if (fpout) { fclose(fpout); fpout = NULL; } return 0; // erreur.. } #if JAVADEBUG printf("calchead\n"); #endif { int i; for(i = 1; i < header.count; i++) { int err = 0; // ++ tab[i] = readtable(str, fpout, tab[i], &err); if (!err) { if ((tab[i].type == HTS_LONG) || (tab[i].type == HTS_DOUBLE)) i++; //2 element si double ou float } else { // ++ une erreur est survenue! if (strnotempty(str->err_msg) == 0) strcpy(str->err_msg, "Internal readtable error"); free(tab); if (fpout) { fclose(fpout); fpout = NULL; } return 0; } } } #if JAVADEBUG printf("addfiles\n"); #endif { //unsigned int acess; unsigned int Class; unsigned int SClass; int i; //acess = readshort(fpout); Class = readshort(fpout); SClass = readshort(fpout); for(i = 1; i < header.count; i++) { if (tab[i].type == HTS_CLASS) { if ((tab[i].index1 < header.count) && (tab[i].index1 >= 0)) { if ((tab[i].index1 != SClass) && (tab[i].index1 != Class) && (tab[tab[i].index1].name[0] != '[')) { if (!strstr(tab[tab[i].index1].name, "java/")) { char BIGSTK tempo[1024]; tempo[0] = '\0'; sprintf(tempo, "%s.class", tab[tab[i].index1].name); #if JAVADEBUG printf("add %s\n", tempo); #endif if (tab[tab[i].index1].file_position >= 0) str->addLink(str, tempo); /* tab[tab[i].index1].file_position */ } } } else { i = header.count; // exit } } } } #if JAVADEBUG printf("end\n"); #endif free(tab); if (fpout) { fclose(fpout); fpout = NULL; } return 1; } else { strcpy(str->err_msg, "bad MIME type"); } } return 0; /* Error */ }