예제 #1
0
파일: main.c 프로젝트: rainkid/httrack
static int __cdecl htsshow_checkhtml(t_hts_callbackarg *carg, httrackp *opt, char* html,int len,const char* url_address,const char* url_file) {
	struct hts_proj *proj = CALLBACKARG_USERDEF(carg);
	if (strstr(url_address, "dianying.yisou.com")) {
		fprintf(stderr, "Parsing html file: http://%s%s   [%ldk]\n",url_address, url_file, sizeof(html));
	}
	return 1;
}
static int process(t_hts_callbackarg * carg, httrackp * opt, char *html,
                   int len, const char *address, const char *filename) {
  t_my_userdef *userdef = (t_my_userdef *) CALLBACKARG_USERDEF(carg);

  /*char * const stringfilter = userdef->stringfilter; */
  char **const stringfilters = userdef->stringfilters;

  /* */
  int i = 0;
  int getIt = 0;
  char *pos;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, check_html)
        (CALLBACKARG_PREV_CARG(carg), opt, html, len, address, filename)) {
      return 0;                 /* Abort */
    }
  }

  /* Process */
  if (strcmp(address, "primary") == 0 && strcmp(filename, "/primary") == 0)     /* primary page (list of links) */
    return 1;
  while(stringfilters[i] != NULL && !getIt) {
    if ((pos = strstr(html, stringfilters[i])) != NULL) {
      int j;

      getIt = 1;
      fprintf(stderr,
              "** callback info: found '%s' keyword in '%s%s', crawling this page!\n",
              stringfilters[i], address, filename);
      fprintf(stderr, "** details:\n(..)");
      for(j = 0; j < 72 && pos[j]; j++) {
        if (pos[j] > 32)
          fprintf(stderr, "%c", pos[j]);
        else
          fprintf(stderr, "?");
      }
      fprintf(stderr, "(..)\n");
    }
    i++;
  }
  if (getIt) {
    return 1;                   /* success */
  } else {
    fprintf(stderr,
            "** callback info: won't parse '%s%s' (no specified keywords found)\n",
            address, filename);
    return 0;                   /* this page sucks, don't parse it */
  }
}
예제 #3
0
/* local function called as "end" callback */
static int end_of_mirror(t_hts_callbackarg *carg, httrackp *opt) {
  const char *arginfo = (char*) CALLBACKARG_USERDEF(carg);

  fprintf(stderr, "* mirror end\n");
  hts_log(opt, arginfo, "mirror ended");

  /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
  if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
    /* status is ok on our side, return other callabck's status */
    return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
  }

  return 1;  /* success */
}
예제 #4
0
/* local function called as "end" callback */
static int end_of_mirror(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt) {
  void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg);    /*optional user-defined arg*/

  /* processing */
  fprintf(stderr, "That's all, folks!\n");

  /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
  if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
    /* status is ok on our side, return other callabck's status */
    return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
  }

  return 1;  /* success */
}
예제 #5
0
static int end(t_hts_callbackarg *carg, httrackp *opt) {
  t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
  fprintf(stderr, "** info: wrapper_exit() called!\n");
  if (userdef != NULL) {
    free(userdef);
    userdef = NULL;
  }

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
    return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
  }

  return 1;  /* success */
}
예제 #6
0
/* local function called as "check_html" callback */
static int process_file(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt, 
                        /*other parameters are callback-specific*/
                        char* html, int len, const char* url_address, const char* url_file) {
  void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg);    /*optional user-defined arg*/

  /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt,
                                                html, len, url_address, url_file)) {
        return 0;  /* abort */
      }
  }

  printf("file %s%s content: %s\n", url_address, url_file, html);
  return 1;  /* success */
}
예제 #7
0
static int check_detectedlink_end(t_hts_callbackarg *carg, httrackp *opt) {
  char *base = (char*) CALLBACKARG_USERDEF(carg);

  fprintf(stderr, "Unplugged ..\n");
  if (base != NULL) {
    free(base);
    base = NULL;
  }

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
    return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
  }

  return 1;  /* success */
}
예제 #8
0
static int check_detectedlink(t_hts_callbackarg *carg, httrackp *opt, char* link) {
  t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
  char * const currentURLBeingParsed = userdef->currentURLBeingParsed;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, linkdetected)(CALLBACKARG_PREV_CARG(carg), opt, link)) {
      return 0;  /* Abort */
    }
  }

  /* Process */
  printf("[%s] -> [%s]\n", currentURLBeingParsed, link);

  return 1;  /* success */
}
예제 #9
0
static int process_file(t_hts_callbackarg *carg, httrackp *opt, char* html, int len, const char* url_address, const char* url_file) {
  t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
  char * const currentURLBeingParsed = userdef->currentURLBeingParsed;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) {
      return 0;  /* Abort */
    }
  }

  /* Process */
  printf("now parsing %s%s..\n", url_address, url_file);
  strcpy(currentURLBeingParsed, url_address);
  strcat(currentURLBeingParsed, url_file);

  return 1;  /* success */
}
예제 #10
0
static int check_detectedlink(t_hts_callbackarg *carg, httrackp* opt, char* link) {
  const char *base = (char*) CALLBACKARG_USERDEF(carg);

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, linkdetected)(CALLBACKARG_PREV_CARG(carg), opt, link)) {
      return 0;  /* Abort */
    }
  }

  /* The incoming (read/write) buffer is at least HTS_URLMAXSIZE bytes long */
  if (strncmp(link, "http://", 7) == 0 || strncmp(link, "https://", 8) == 0) {
    char temp[HTS_URLMAXSIZE * 2];
    strcpy(temp, base);
    strcat(temp, link);
    strcpy(link, temp);
  }

  return 1;  /* success */
}
예제 #11
0
/* local function called as "check_html" callback */
static int process_file(t_hts_callbackarg *carg, httrackp *opt, 
                        char* html, int len, const char* url_address, const char* url_file) {
  void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg);    /*optional user-defined arg*/
  char *fmt;

  /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt,
                                                html, len, url_address, url_file)) {
        return 0;  /* abort */
      }
  }

  /* log */
  fprintf(stderr, "* parsing file %s%s\n", url_address, url_file);
  fmt = malloc(strlen(url_address) + strlen(url_file) + 128);
  sprintf(fmt, " parsing file %s%s", url_address, url_file);
  hts_log(opt, "log-wrapper-info", fmt);
  free(fmt);

  return 1;  /* success */
}