static int __cdecl htsshow_checkhtml(t_hts_callbackarg *carg, httrackp *opt, char* html,int len,const char* url_address,const char* url_file) { struct hts_proj *proj = CALLBACKARG_USERDEF(carg); if (strstr(url_address, "dianying.yisou.com")) { fprintf(stderr, "Parsing html file: http://%s%s [%ldk]\n",url_address, url_file, sizeof(html)); } return 1; }
static int process(t_hts_callbackarg * carg, httrackp * opt, char *html, int len, const char *address, const char *filename) { t_my_userdef *userdef = (t_my_userdef *) CALLBACKARG_USERDEF(carg); /*char * const stringfilter = userdef->stringfilter; */ char **const stringfilters = userdef->stringfilters; /* */ int i = 0; int getIt = 0; char *pos; /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, check_html) (CALLBACKARG_PREV_CARG(carg), opt, html, len, address, filename)) { return 0; /* Abort */ } } /* Process */ if (strcmp(address, "primary") == 0 && strcmp(filename, "/primary") == 0) /* primary page (list of links) */ return 1; while(stringfilters[i] != NULL && !getIt) { if ((pos = strstr(html, stringfilters[i])) != NULL) { int j; getIt = 1; fprintf(stderr, "** callback info: found '%s' keyword in '%s%s', crawling this page!\n", stringfilters[i], address, filename); fprintf(stderr, "** details:\n(..)"); for(j = 0; j < 72 && pos[j]; j++) { if (pos[j] > 32) fprintf(stderr, "%c", pos[j]); else fprintf(stderr, "?"); } fprintf(stderr, "(..)\n"); } i++; } if (getIt) { return 1; /* success */ } else { fprintf(stderr, "** callback info: won't parse '%s%s' (no specified keywords found)\n", address, filename); return 0; /* this page sucks, don't parse it */ } }
/* local function called as "end" callback */ static int end_of_mirror(t_hts_callbackarg *carg, httrackp *opt) { const char *arginfo = (char*) CALLBACKARG_USERDEF(carg); fprintf(stderr, "* mirror end\n"); hts_log(opt, arginfo, "mirror ended"); /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) { /* status is ok on our side, return other callabck's status */ return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt); } return 1; /* success */ }
/* local function called as "end" callback */ static int end_of_mirror(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt) { void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg); /*optional user-defined arg*/ /* processing */ fprintf(stderr, "That's all, folks!\n"); /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) { /* status is ok on our side, return other callabck's status */ return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt); } return 1; /* success */ }
static int end(t_hts_callbackarg *carg, httrackp *opt) { t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg); fprintf(stderr, "** info: wrapper_exit() called!\n"); if (userdef != NULL) { free(userdef); userdef = NULL; } /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) { return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt); } return 1; /* success */ }
/* local function called as "check_html" callback */ static int process_file(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt, /*other parameters are callback-specific*/ char* html, int len, const char* url_address, const char* url_file) { void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg); /*optional user-defined arg*/ /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) { return 0; /* abort */ } } printf("file %s%s content: %s\n", url_address, url_file, html); return 1; /* success */ }
static int check_detectedlink_end(t_hts_callbackarg *carg, httrackp *opt) { char *base = (char*) CALLBACKARG_USERDEF(carg); fprintf(stderr, "Unplugged ..\n"); if (base != NULL) { free(base); base = NULL; } /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) { return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt); } return 1; /* success */ }
static int check_detectedlink(t_hts_callbackarg *carg, httrackp *opt, char* link) { t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg); char * const currentURLBeingParsed = userdef->currentURLBeingParsed; /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, linkdetected)(CALLBACKARG_PREV_CARG(carg), opt, link)) { return 0; /* Abort */ } } /* Process */ printf("[%s] -> [%s]\n", currentURLBeingParsed, link); return 1; /* success */ }
static int process_file(t_hts_callbackarg *carg, httrackp *opt, char* html, int len, const char* url_address, const char* url_file) { t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg); char * const currentURLBeingParsed = userdef->currentURLBeingParsed; /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) { return 0; /* Abort */ } } /* Process */ printf("now parsing %s%s..\n", url_address, url_file); strcpy(currentURLBeingParsed, url_address); strcat(currentURLBeingParsed, url_file); return 1; /* success */ }
static int check_detectedlink(t_hts_callbackarg *carg, httrackp* opt, char* link) { const char *base = (char*) CALLBACKARG_USERDEF(carg); /* Call parent functions if multiple callbacks are chained. */ if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, linkdetected)(CALLBACKARG_PREV_CARG(carg), opt, link)) { return 0; /* Abort */ } } /* The incoming (read/write) buffer is at least HTS_URLMAXSIZE bytes long */ if (strncmp(link, "http://", 7) == 0 || strncmp(link, "https://", 8) == 0) { char temp[HTS_URLMAXSIZE * 2]; strcpy(temp, base); strcat(temp, link); strcpy(link, temp); } return 1; /* success */ }
/* local function called as "check_html" callback */ static int process_file(t_hts_callbackarg *carg, httrackp *opt, char* html, int len, const char* url_address, const char* url_file) { void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg); /*optional user-defined arg*/ char *fmt; /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) { if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) { return 0; /* abort */ } } /* log */ fprintf(stderr, "* parsing file %s%s\n", url_address, url_file); fmt = malloc(strlen(url_address) + strlen(url_file) + 128); sprintf(fmt, " parsing file %s%s", url_address, url_file); hts_log(opt, "log-wrapper-info", fmt); free(fmt); return 1; /* success */ }