std::vector<SubstringOccurence> IndexSearcher::simpleSearch( boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<saidx_t> > SA, const std::string & pattern) throw(ConcordiaException) { std::vector<SubstringOccurence> result; int left; std::vector<INDEX_CHARACTER_TYPE> hash = hashGenerator->generateHash(pattern); saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE); sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash); int size = sa_search(T->data(), (saidx_t) T->size(), (const sauchar_t *) patternArray, patternLength, SA->data(), (saidx_t) SA->size(), &left); for (int i = 0; i < size; ++i) { saidx_t resultPos = SA->at(left + i); if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) { // As we are looking for a pattern in an array of higher // resolution than the hashed index file, we might // obtain accidental results exceeding the boundaries // of characters in hashed index. The above check // removes these accidental results. saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos); result.push_back(SubstringOccurence(marker)); } } delete[] patternArray; return result; }
static void _search_file(const sauchar_t *T, saidx_t Tsize, const sauchar_t *P, saidx_t Psize, const saidx_t *SA, saidx_t SAsize, searchoption_t *option) { saidx_t size, left; size = sa_search(T, Tsize, P, Psize, SA, SAsize, &left); if(0 <= option->maxcount) { if(option->maxcount == 0) { return; } if(option->maxcount < size) { size = option->maxcount; } } option->func(T, Tsize, P, Psize, SA, SAsize, left, size, option); }
int main(int argc, const char *argv[]) { FILE *fp; const char *P; sauchar_t *T; saidx_t *SA; LFS_OFF_T n; size_t Psize; saidx_t i, size, left; if((argc == 1) || (strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } if(argc != 4) { print_help(argv[0], EXIT_FAILURE); } P = argv[1]; Psize = strlen(P); /* Open a file for reading. */ #if HAVE_FOPEN_S if(fopen_s(&fp, argv[2], "rb") != 0) { #else if((fp = LFS_FOPEN(argv[2], "rb")) == NULL) { #endif fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]); perror(NULL); exit(EXIT_FAILURE); } /* Get the file size. */ if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { n = LFS_FTELL(fp); rewind(fp); if(n < 0) { fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], argv[2]); perror(NULL); exit(EXIT_FAILURE); } } else { fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], argv[2]); perror(NULL); exit(EXIT_FAILURE); } /* Allocate 5n bytes of memory. */ T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t)); SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t)); if((T == NULL) || (SA == NULL)) { fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); exit(EXIT_FAILURE); } /* Read n bytes of data. */ if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) { fprintf(stderr, "%s: %s `%s': ", argv[0], (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", argv[2]); perror(NULL); exit(EXIT_FAILURE); } fclose(fp); /* Open the SA file for reading. */ #if HAVE_FOPEN_S if(fopen_s(&fp, argv[3], "rb") != 0) { #else if((fp = LFS_FOPEN(argv[3], "rb")) == NULL) { #endif fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[3]); perror(NULL); exit(EXIT_FAILURE); } /* Read n * sizeof(saidx_t) bytes of data. */ if(fread(SA, sizeof(saidx_t), (size_t)n, fp) != (size_t)n) { fprintf(stderr, "%s: %s `%s': ", argv[0], (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", argv[3]); perror(NULL); exit(EXIT_FAILURE); } fclose(fp); /* Search and print */ size = sa_search(T, (saidx_t)n, (const sauchar_t *)P, (saidx_t)Psize, SA, (saidx_t)n, &left); for(i = 0; i < size; ++i) { fprintf(stdout, "%" PRIdSAIDX_T "\n", SA[left + i]); } /* Deallocate memory. */ free(SA); free(T); return 0; }