int main(int argc, char **argv) { static const char *stopwordFilename = "/home/compilers/media/assn-4-rss-news-search-data/stop-words.txt"; static const int kStopwordBuckets = 1009; static const int kIndexNumBuckets = 10007; rssData allData; HashSetNew(&allData.stopwords, sizeof(char*), kStopwordBuckets, StringHash, StringCmp, StringFree); HashSetNew(&allData.indices, sizeof(indexEntry), kIndexNumBuckets, IndexHash, IndexCmp, IndexFree); // this vector VectorNew(&allData.explored, sizeof(article), ArticleFree, 10); Welcome(kWelcomeTextFile); ReadStopwords(&allData.stopwords, stopwordFilename); BuildIndices((argc == 1) ? kDefaultFeedsFile : argv[1], &allData ); int hcount = HashSetCount(&allData.indices); printf("hcount: %d\n", hcount); printf("Finished BuildIndices\n"); QueryIndices(&allData); return 0; }
static void TokenizeAndBuildThesaurus(hashset *thesaurus, streamtokenizer *st) { printf("Loading thesaurus. Be patient! "); fflush(stdout); char buffer[2048]; while (STNextToken(st, buffer, sizeof(buffer))) { thesaurusEntry entry; entry.word = strdup(buffer); VectorNew(&entry.synonyms, sizeof(char *), StringFree, 4); while (STNextToken(st, buffer, sizeof(buffer)) && (buffer[0] == ',')) { STNextToken(st, buffer, sizeof(buffer)); char *synonym = strdup(buffer); VectorAppend(&entry.synonyms, &synonym); } HashSetEnter(thesaurus, &entry); if (HashSetCount(thesaurus) % 1000 == 0) { printf("."); fflush(stdout); } } printf(" [All done!]\n"); fflush(stdout); }
static void TokenizeAndBuildStopwords(hashset *stopwords, streamtokenizer *tokenMaker) { printf("loading Stopwords...\n"); char buffer[2048]; while(STNextToken(tokenMaker, buffer, sizeof(buffer))){ const char *currWordPtr; currWordPtr = strdup(buffer); HashSetEnter(stopwords, &currWordPtr); } printf("loaded %d words\n", HashSetCount(stopwords)); }