Пример #1
0
static void LoadStopWords(hashset *stopWords, const char *stopWordsURL)
{
  url u;
  urlconnection urlconn;
  
  URLNewAbsolute(&u, stopWordsURL);
  URLConnectionNew(&urlconn, &u);
  
  if (urlconn.responseCode / 100 == 3) {
    LoadStopWords(stopWords, urlconn.newUrl);
  } else {
    streamtokenizer st;
    char buffer[4096];
    HashSetNew(stopWords, sizeof(char *), kNumStopWordsBuckets, StringHash, StringCompare, StringFree);
    STNew(&st, urlconn.dataStream, kNewLineDelimiters, true);
    while (STNextToken(&st, buffer, sizeof(buffer))) {
      char *stopWord = strdup(buffer);
      HashSetEnter(stopWords, &stopWord);
    }
    STDispose(&st);
  }

  URLConnectionDispose(&urlconn);
  URLDispose(&u);
}
Пример #2
0
int main(int argc, char **argv)
{
  const char *feedsFileName = (argc == 1) ? kDefaultFeedsFile : argv[1];
  rssDatabase db;    
  initThreadsData(&db);
  //InitThreadPackage(false);  
  Welcome(kWelcomeTextFile);
  LoadStopWords(&db.stopWords, kDefaultStopWordsFile);  
  BuildIndices(&db, feedsFileName);
  
  cleanThreadData(&db);

  QueryIndices(&db);
  pthread_exit(NULL);
  return 0;
}
Пример #3
0
int main(int argc, char **argv)
{
  const char *feedsFileURL = (argc == 1) ? kDefaultFeedsFileURL : argv[1];
  
    Welcome(kWelcomeTextURL);
    
    rssFeedData rssFData;
    CreateDataStructure(&rssFData);
    
    
    LoadStopWords(kDefaultStopWordsURL, &rssFData);
    
    //void *found = HashSetLookup(&(rssFData.stopWords), &smstr);
  
    BuildIndices(feedsFileURL, &rssFData);
    //HashSetMap(&(rssFData.articles), ArticleMap, NULL);
    QueryIndices(&rssFData);
    DisposeDataStructure(&rssFData);
  return 0;
}
Пример #4
0
/**
 * Function: main
 * --------------
 * Serves as the entry point of the full RSS News Feed Aggregator.
 * 
 * @param argc the number of tokens making up the shell command invoking the
 *             application.  It should be either 1 or 2--2 when the used wants to
 *             specify what flat text file should be used to source all of the 
 *             RSS feeds.
 * @param argv the array of one of more tokens making up the command line invoking
 *             the application.  The 0th token is ignored, and the 1st one, if present,
 *             is taken to be the path identifying where the list of RSS feeds is.
 * @return always 0 if it main returns normally (although there might be exit(n) calls
 *         within the code base that end the program abnormally)
 */
int main(int argc, char **argv)
{
  const char *feedsFileURL = (argc == 1) ? kDefaultFeedsFileURL : argv[1];
 
  Welcome(kWelcomeTextURL);
 
  rssFeedData data;
  CreateDataStructure(&data);
  
  LoadStopWords(kDefaultStopWordsURL,&data.stopWords);
  
  BuildIndices(feedsFileURL, &data);
  // tests
  // HashSetMap(&data.stopWords, StringMap, NULL);
  // HashSetMap(&data.articles, ArticleMap, NULL);
  // HashSetMap(&data.indices, IndexMap, NULL);
  
  QueryIndices(&data);
  
  DisposeData(&data);
  return 0;
}
Пример #5
0
static void LoadStopWords(const char *stopWordsURL, rssFeedData *dataPtr) {
	url u;
	urlconnection urlconn;
	
	URLNewAbsolute(&u, stopWordsURL);
	URLConnectionNew(&urlconn, &u);

	if(urlconn.responseCode / 100 == 3) {
		LoadStopWords(urlconn.newUrl, dataPtr);
	} else {
		streamtokenizer st;
		char buffer[4096];
		STNew(&st, urlconn.dataStream, kNewLineDelimiters, true);
		while (STNextToken(&st, buffer, sizeof(buffer))) {
      			char *s = strdup(buffer);
			HashSetEnter(&(dataPtr->stopWords), &s);
			//printf("%s\n", buffer);
    		}  
    		printf("\n");
    		STDispose(&st); 
	}
	URLConnectionDispose(&urlconn);
  	URLDispose(&u);
}