コード例 #1
0
ファイル: index_searcher.cpp プロジェクト: rjawor/concordia
std::vector<SubstringOccurence> IndexSearcher::simpleSearch(
                  boost::shared_ptr<HashGenerator> hashGenerator,
                  boost::shared_ptr<std::vector<sauchar_t> > T,
                  boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
                  boost::shared_ptr<std::vector<saidx_t> > SA,
                  const std::string & pattern) throw(ConcordiaException) {
    std::vector<SubstringOccurence> result;

    int left;
    std::vector<INDEX_CHARACTER_TYPE> hash =
                                 hashGenerator->generateHash(pattern);
    saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE);
    sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);

    int size = sa_search(T->data(), (saidx_t) T->size(),
                         (const sauchar_t *) patternArray, patternLength,
                         SA->data(), (saidx_t) SA->size(), &left);
    for (int i = 0; i < size; ++i) {
        saidx_t resultPos = SA->at(left + i);
        if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
        // As we are looking for a pattern in an array of higher
        // resolution than the hashed index file, we might
        // obtain accidental results exceeding the boundaries
        // of characters in hashed index. The above check
        // removes these accidental results.
            saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE);
            SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos);

            result.push_back(SubstringOccurence(marker));
        }
    }

    delete[] patternArray;
    return result;
}
コード例 #2
0
ファイル: sasearch.c プロジェクト: cpockrandt/pr_dict_cmp
static
void
_search_file(const sauchar_t *T, saidx_t Tsize,
             const sauchar_t *P, saidx_t Psize,
             const saidx_t *SA, saidx_t SAsize,
             searchoption_t *option) {
    saidx_t size, left;
    size = sa_search(T, Tsize, P, Psize, SA, SAsize, &left);
    if(0 <= option->maxcount) {
        if(option->maxcount == 0) {
            return;
        }
        if(option->maxcount < size) {
            size = option->maxcount;
        }
    }
    option->func(T, Tsize, P, Psize, SA, SAsize, left, size, option);
}
コード例 #3
0
ファイル: sasearch.c プロジェクト: Buttonwood/Sibelia
int
main(int argc, const char *argv[]) {
  FILE *fp;
  const char *P;
  sauchar_t *T;
  saidx_t *SA;
  LFS_OFF_T n;
  size_t Psize;
  saidx_t i, size, left;

  if((argc == 1) ||
     (strcmp(argv[1], "-h") == 0) ||
     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
  if(argc != 4) { print_help(argv[0], EXIT_FAILURE); }

  P = argv[1];
  Psize = strlen(P);

  /* Open a file for reading. */
#if HAVE_FOPEN_S
  if(fopen_s(&fp, argv[2], "rb") != 0) {
#else
  if((fp = LFS_FOPEN(argv[2], "rb")) == NULL) {
#endif
    fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]);
    perror(NULL);
    exit(EXIT_FAILURE);
  }

  /* Get the file size. */
  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
    n = LFS_FTELL(fp);
    rewind(fp);
    if(n < 0) {
      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], argv[2]);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
  } else {
    fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], argv[2]);
    perror(NULL);
    exit(EXIT_FAILURE);
  }

  /* Allocate 5n bytes of memory. */
  T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
  SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
  if((T == NULL) || (SA == NULL)) {
    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
    exit(EXIT_FAILURE);
  }

  /* Read n bytes of data. */
  if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
    fprintf(stderr, "%s: %s `%s': ",
      argv[0],
      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
      argv[2]);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  fclose(fp);

  /* Open the SA file for reading. */
#if HAVE_FOPEN_S
  if(fopen_s(&fp, argv[3], "rb") != 0) {
#else
  if((fp = LFS_FOPEN(argv[3], "rb")) == NULL) {
#endif
    fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[3]);
    perror(NULL);
    exit(EXIT_FAILURE);
  }

  /* Read n * sizeof(saidx_t) bytes of data. */
  if(fread(SA, sizeof(saidx_t), (size_t)n, fp) != (size_t)n) {
    fprintf(stderr, "%s: %s `%s': ",
      argv[0],
      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
      argv[3]);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  fclose(fp);

  /* Search and print */
  size = sa_search(T, (saidx_t)n,
                   (const sauchar_t *)P, (saidx_t)Psize,
                   SA, (saidx_t)n, &left);
  for(i = 0; i < size; ++i) {
    fprintf(stdout, "%" PRIdSAIDX_T "\n", SA[left + i]);
  }

  /* Deallocate memory. */
  free(SA);
  free(T);

  return 0;
}