Пример #1
0
int main(int argc, char **argv)
{
  static const char *stopwordFilename = "/home/compilers/media/assn-4-rss-news-search-data/stop-words.txt"; 
  static const int kStopwordBuckets = 1009; 
  static const int kIndexNumBuckets = 10007; 

  rssData allData; 

  HashSetNew(&allData.stopwords, sizeof(char*), kStopwordBuckets, StringHash, StringCmp, StringFree); 
 
  HashSetNew(&allData.indices, sizeof(indexEntry), kIndexNumBuckets, IndexHash, IndexCmp, IndexFree); 
   
  // this vector
  VectorNew(&allData.explored, sizeof(article), ArticleFree, 10);
  
  Welcome(kWelcomeTextFile);
  ReadStopwords(&allData.stopwords, stopwordFilename); 
  BuildIndices((argc == 1) ? kDefaultFeedsFile : argv[1], &allData );

  int hcount = HashSetCount(&allData.indices); 
  printf("hcount: %d\n", hcount); 
  
  printf("Finished BuildIndices\n"); 
  QueryIndices(&allData);
  return 0;
}
static void TokenizeAndBuildThesaurus(hashset *thesaurus, streamtokenizer *st)
{
  printf("Loading thesaurus. Be patient! ");
  fflush(stdout);

  char buffer[2048];
  while (STNextToken(st, buffer, sizeof(buffer))) {
    thesaurusEntry entry;
    entry.word = strdup(buffer);
    VectorNew(&entry.synonyms, sizeof(char *), StringFree, 4);
    while (STNextToken(st, buffer, sizeof(buffer)) && (buffer[0] == ',')) {
      STNextToken(st, buffer, sizeof(buffer));
      char *synonym = strdup(buffer);
      VectorAppend(&entry.synonyms, &synonym);
    }
    HashSetEnter(thesaurus, &entry);
    if (HashSetCount(thesaurus) % 1000 == 0) {
      printf(".");
      fflush(stdout);
    }
  }

  printf(" [All done!]\n");
  fflush(stdout);
}
Пример #3
0
static void TokenizeAndBuildStopwords(hashset *stopwords, streamtokenizer *tokenMaker)
{ 
  printf("loading Stopwords...\n"); 
  
  char buffer[2048]; 
  while(STNextToken(tokenMaker, buffer, sizeof(buffer))){ 
    const char *currWordPtr; 
    currWordPtr = strdup(buffer); 
    HashSetEnter(stopwords, &currWordPtr); 
  }
  printf("loaded %d words\n", HashSetCount(stopwords));
}