/*
  Replaces all "offending" IIS extensions (exe, dll..) with "nice" ones
*/
static int mysavename(t_hts_callbackarg * carg, httrackp * opt,
                      const char *adr_complete, const char *fil_complete,
                      const char *referer_adr, const char *referer_fil,
                      char *save) {
  static const char *iisBogus[] = { ".com", ".exe", ".dll", ".sh", NULL };
  static const char *iisBogusReplace[] = { ".c0m", ".ex3", ".dl1", ".5h", NULL };       /* MUST be the same sizes */
  char *a;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, savename) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, savename)
        (CALLBACKARG_PREV_CARG(carg), opt, adr_complete, fil_complete,
         referer_adr, referer_fil, save)) {
      return 0;                 /* Abort */
    }
  }

  /* Process */
  for(a = save; *a != '\0'; a++) {
    int i;

    for(i = 0; iisBogus[i] != NULL; i++) {
      int j;

      for(j = 0; iisBogus[i][j] == a[j] && iisBogus[i][j] != '\0'; j++) ;
      if (iisBogus[i][j] == '\0'
          && (a[j] == '\0' || a[j] == '/' || a[j] == '\\')) {
        strncpy(a, iisBogusReplace[i], strlen(iisBogusReplace[i]));
        break;
      }
    }
  }

  return 1;                     /* success */
}
static int mysavename(t_hts_callbackarg * carg, httrackp * opt,
                      const char *adr_complete, const char *fil_complete,
                      const char *referer_adr, const char *referer_fil,
                      char *save) {
  char *a;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, savename) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, savename)
        (CALLBACKARG_PREV_CARG(carg), opt, adr_complete, fil_complete,
         referer_adr, referer_fil, save)) {
      return 0;                 /* Abort */
    }
  }

  /* Process */
  for(a = save; *a != 0; a++) {
    char c = TOLOWER(*a);

    if (c >= 'a' && c <= 'z')
      *a = (((c - 'a') + 13) % 26) + 'a';       // ROT-13
  }

  return 1;                     /* success */
}
static int process(t_hts_callbackarg * carg, httrackp * opt, char *html,
                   int len, const char *address, const char *filename) {
  t_my_userdef *userdef = (t_my_userdef *) CALLBACKARG_USERDEF(carg);

  /*char * const stringfilter = userdef->stringfilter; */
  char **const stringfilters = userdef->stringfilters;

  /* */
  int i = 0;
  int getIt = 0;
  char *pos;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, check_html)
        (CALLBACKARG_PREV_CARG(carg), opt, html, len, address, filename)) {
      return 0;                 /* Abort */
    }
  }

  /* Process */
  if (strcmp(address, "primary") == 0 && strcmp(filename, "/primary") == 0)     /* primary page (list of links) */
    return 1;
  while(stringfilters[i] != NULL && !getIt) {
    if ((pos = strstr(html, stringfilters[i])) != NULL) {
      int j;

      getIt = 1;
      fprintf(stderr,
              "** callback info: found '%s' keyword in '%s%s', crawling this page!\n",
              stringfilters[i], address, filename);
      fprintf(stderr, "** details:\n(..)");
      for(j = 0; j < 72 && pos[j]; j++) {
        if (pos[j] > 32)
          fprintf(stderr, "%c", pos[j]);
        else
          fprintf(stderr, "?");
      }
      fprintf(stderr, "(..)\n");
    }
    i++;
  }
  if (getIt) {
    return 1;                   /* success */
  } else {
    fprintf(stderr,
            "** callback info: won't parse '%s%s' (no specified keywords found)\n",
            address, filename);
    return 0;                   /* this page sucks, don't parse it */
  }
}
Esempio n. 4
0
/* local function called as "end" callback */
static int end_of_mirror(t_hts_callbackarg *carg, httrackp *opt) {
  const char *arginfo = (char*) CALLBACKARG_USERDEF(carg);

  fprintf(stderr, "* mirror end\n");
  hts_log(opt, arginfo, "mirror ended");

  /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
  if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
    /* status is ok on our side, return other callabck's status */
    return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
  }

  return 1;  /* success */
}
Esempio n. 5
0
/* local function called as "end" callback */
static int end_of_mirror(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt) {
  void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg);    /*optional user-defined arg*/

  /* processing */
  fprintf(stderr, "That's all, folks!\n");

  /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
  if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
    /* status is ok on our side, return other callabck's status */
    return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
  }

  return 1;  /* success */
}
static int check_loop(t_hts_callbackarg *carg, httrackp *opt, void* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,void* stats) {
  static int fun_animation=0;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, loop) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, loop)(CALLBACKARG_PREV_CARG(carg), opt, back, back_max, back_index, lien_tot, lien_ntot, stat_time, stats)) {
      return 0;  /* Abort */
    }
  }

  /* Process */
  printf("%c\r", "/-\\|"[(fun_animation++)%4]);
  return 1;
}
static int end(t_hts_callbackarg *carg, httrackp *opt) {
  t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
  fprintf(stderr, "** info: wrapper_exit() called!\n");
  if (userdef != NULL) {
    free(userdef);
    userdef = NULL;
  }

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
    return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
  }

  return 1;  /* success */
}
Esempio n. 8
0
/* local function called as "check_html" callback */
static int process_file(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt, 
                        /*other parameters are callback-specific*/
                        char* html, int len, const char* url_address, const char* url_file) {
  void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg);    /*optional user-defined arg*/

  /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt,
                                                html, len, url_address, url_file)) {
        return 0;  /* abort */
      }
  }

  printf("file %s%s content: %s\n", url_address, url_file, html);
  return 1;  /* success */
}
static int check_detectedlink_end(t_hts_callbackarg *carg, httrackp *opt) {
  char *base = (char*) CALLBACKARG_USERDEF(carg);

  fprintf(stderr, "Unplugged ..\n");
  if (base != NULL) {
    free(base);
    base = NULL;
  }

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
    return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
  }

  return 1;  /* success */
}
static int check_detectedlink(t_hts_callbackarg *carg, httrackp *opt, char* link) {
  t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
  char * const currentURLBeingParsed = userdef->currentURLBeingParsed;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, linkdetected)(CALLBACKARG_PREV_CARG(carg), opt, link)) {
      return 0;  /* Abort */
    }
  }

  /* Process */
  printf("[%s] -> [%s]\n", currentURLBeingParsed, link);

  return 1;  /* success */
}
static int process_file(t_hts_callbackarg *carg, httrackp* opt, char* html, int len, const char* url_address, const char* url_file) {
  char* prevBase;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) {
      return 0;  /* Abort */
    }
  }

  /* Disable base href, if any */
  if ( ( prevBase = strstr(html, "<BASE HREF=\"") ) != NULL) {
    prevBase[1] = 'X';
  }

  return 1;  /* success */
}
Esempio n. 12
0
static int hts_detect_java(t_hts_callbackarg * carg, httrackp * opt,
                           htsmoduleStruct * str) {
  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, detect) != NULL) {
    if (CALLBACKARG_PREV_FUN(carg, detect)
        (CALLBACKARG_PREV_CARG(carg), opt, str)) {
      return 1;                 /* Found before us, let them have the priority */
    }
  }

  /* Check MIME */
  if (detect_mime(str)) {
    str->wrapper_name = libName;        /* Our ID */
    return 1;                   /* Known format, we take it */
  }

  return 0;                     /* Unknown format */
}
static int process_file(t_hts_callbackarg *carg, httrackp *opt, char* html, int len, const char* url_address, const char* url_file) {
  t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
  char * const currentURLBeingParsed = userdef->currentURLBeingParsed;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) {
      return 0;  /* Abort */
    }
  }

  /* Process */
  printf("now parsing %s%s..\n", url_address, url_file);
  strcpy(currentURLBeingParsed, url_address);
  strcat(currentURLBeingParsed, url_file);

  return 1;  /* success */
}
static int check_detectedlink(t_hts_callbackarg *carg, httrackp* opt, char* link) {
  const char *base = (char*) CALLBACKARG_USERDEF(carg);

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, linkdetected)(CALLBACKARG_PREV_CARG(carg), opt, link)) {
      return 0;  /* Abort */
    }
  }

  /* The incoming (read/write) buffer is at least HTS_URLMAXSIZE bytes long */
  if (strncmp(link, "http://", 7) == 0 || strncmp(link, "https://", 8) == 0) {
    char temp[HTS_URLMAXSIZE * 2];
    strcpy(temp, base);
    strcat(temp, link);
    strcpy(link, temp);
  }

  return 1;  /* success */
}
Esempio n. 15
0
/* local function called as "check_html" callback */
static int process_file(t_hts_callbackarg *carg, httrackp *opt, 
                        char* html, int len, const char* url_address, const char* url_file) {
  void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg);    /*optional user-defined arg*/
  char *fmt;

  /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt,
                                                html, len, url_address, url_file)) {
        return 0;  /* abort */
      }
  }

  /* log */
  fprintf(stderr, "* parsing file %s%s\n", url_address, url_file);
  fmt = malloc(strlen(url_address) + strlen(url_file) + 128);
  sprintf(fmt, " parsing file %s%s", url_address, url_file);
  hts_log(opt, "log-wrapper-info", fmt);
  free(fmt);

  return 1;  /* success */
}
Esempio n. 16
0
static int hts_parse_java(t_hts_callbackarg * carg, httrackp * opt,
                          htsmoduleStruct * str) {
  /* The wrapper_name memebr has changed: not for us anymore */
  if (str->wrapper_name == NULL || strcmp(str->wrapper_name, libName) != 0) {
    /* Call parent functions if multiple callbacks are chained. */
    if (CALLBACKARG_PREV_FUN(carg, parse) != NULL) {
      return CALLBACKARG_PREV_FUN(carg, parse) (CALLBACKARG_PREV_CARG(carg),
                                                opt, str);
    }
    strcpy(str->err_msg,
           "unexpected error: bad wrapper_name and no previous wrapper");
    return 0;                   /* Unexpected error */
  } else {
    if (detect_mime(str)) {

      /* (Legacy code) */
      char catbuff[CATBUFF_SIZE];
      FILE *fpout;
      JAVA_HEADER header;
      RESP_STRUCT *tab;
      const char *file = str->filename;

      str->relativeToHtmlLink = 1;

#if JAVADEBUG
      printf("fopen\n");
#endif
      if ((fpout = FOPEN(fconv(catbuff, sizeof(catbuff), file), "r+b")) == NULL) {
        //fprintf(stderr, "Cannot open input file.\n");
        sprintf(str->err_msg, "Unable to open file %s", file);
        return 0;               // une erreur..
      }
#if JAVADEBUG
      printf("fread\n");
#endif
      //if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) {   // pas complet..
      if (fread(&header, 1, 10, fpout) != 10) { // pas complet..
        fclose(fpout);
        sprintf(str->err_msg, "File header too small (file len = " LLintP ")",
                (LLint) fsize(file));
        return 0;
      }
#if JAVADEBUG
      printf("header\n");
#endif
      // tester en tête
      if (reverse_endian()) {
        header.magic = hts_swap32(header.magic);
        header.count = hts_swap16(header.count);
      }
      if (header.magic != 0xCAFEBABE) {
        sprintf(str->err_msg, "non java file");
        if (fpout) {
          fclose(fpout);
          fpout = NULL;
        }
        return 0;
      }

      tab = (RESP_STRUCT *) calloc(header.count, sizeof(RESP_STRUCT));
      if (!tab) {
        sprintf(str->err_msg, "Unable to alloc %d bytes",
                (int) sizeof(RESP_STRUCT));
        if (fpout) {
          fclose(fpout);
          fpout = NULL;
        }
        return 0;               // erreur..
      }
#if JAVADEBUG
      printf("calchead\n");
#endif
      {
        int i;

        for(i = 1; i < header.count; i++) {
          int err = 0;          // ++    

          tab[i] = readtable(str, fpout, tab[i], &err);
          if (!err) {
            if ((tab[i].type == HTS_LONG) || (tab[i].type == HTS_DOUBLE))
              i++;              //2 element si double ou float
          } else {              // ++ une erreur est survenue!
            if (strnotempty(str->err_msg) == 0)
              strcpy(str->err_msg, "Internal readtable error");
            free(tab);
            if (fpout) {
              fclose(fpout);
              fpout = NULL;
            }
            return 0;
          }
        }

      }

#if JAVADEBUG
      printf("addfiles\n");
#endif
      {
        //unsigned int acess;
        unsigned int Class;
        unsigned int SClass;
        int i;

        //acess = readshort(fpout);
        Class = readshort(fpout);
        SClass = readshort(fpout);

        for(i = 1; i < header.count; i++) {

          if (tab[i].type == HTS_CLASS) {

            if ((tab[i].index1 < header.count) && (tab[i].index1 >= 0)) {

              if ((tab[i].index1 != SClass) && (tab[i].index1 != Class)
                  && (tab[tab[i].index1].name[0] != '[')) {

                if (!strstr(tab[tab[i].index1].name, "java/")) {
                  char BIGSTK tempo[1024];

                  tempo[0] = '\0';

                  sprintf(tempo, "%s.class", tab[tab[i].index1].name);
#if JAVADEBUG
                  printf("add %s\n", tempo);
#endif
                  if (tab[tab[i].index1].file_position >= 0)
                    str->addLink(str, tempo);   /* tab[tab[i].index1].file_position */
                }

              }
            } else {
              i = header.count; // exit 
            }
          }

        }
      }

#if JAVADEBUG
      printf("end\n");
#endif
      free(tab);
      if (fpout) {
        fclose(fpout);
        fpout = NULL;
      }
      return 1;

    } else {
      strcpy(str->err_msg, "bad MIME type");
    }
  }
  return 0;                     /* Error */
}