コード例 #1
0
ファイル: rss-news-search.c プロジェクト: mariolew/cs107
static void ScanArticle(streamtokenizer *st, const char *articleTitle, const char *unused, const char *articleURL,
                        hashset *stopWords, hashset *wordCounts)
{
  int numWords = 0;
  char word[1024];
  char longestWord[1024] = {'\0'};

  while (STNextToken(st, word, sizeof(word))) {
    if (strcasecmp(word, "<") == 0) {
      SkipIrrelevantContent(st); // in html-utls.h
    } else {
      RemoveEscapeCharacters(word);
      char *dummy = word;
      void *pos = HashSetLookup(stopWords, &dummy);
      if (WordIsWellFormed(word) && pos == NULL) {
          //numWords++;
          //if (strlen(word) > strlen(longestWord))
          //  strcpy(longestWord, word);
          wordCountEnter(wordCounts, word, articleURL, articleTitle);
      }
    }
  }

  /*printf("\tWe counted %d well-formed words [including duplicates].\n", numWords);
  printf("\tThe longest word scanned was \"%s\".", longestWord);
  if (strlen(longestWord) >= 15 && (strchr(longestWord, '-') == NULL)) 
    printf(" [Ooooo... long word!]");
    printf("\n");*/
}
コード例 #2
0
static void ScanArticle(streamtokenizer *st, void* userData)
{
  rssFeedData *data = userData;
  articleData* article = AddArticle(&data->articles,&data->item);
  
  int numWords = 0;
  char word[1024];
  char longestWord[1024] = {'\0'};

  while (STNextToken(st, word, sizeof(word))) {
    if (strcasecmp(word, "<") == 0) {
      SkipIrrelevantContent(st); // in html-utls.h
    } else {
      RemoveEscapeCharacters(word);
      if (WordIsWellFormed(word)) {
	char* dummy = word;//need this becouse cant do &word in c
	if(HashSetLookup(&data->stopWords,&dummy)==NULL){// skip stopwords

	  indexData *entry = addWordRecord(&data->indices, word);
	  indexWord(&entry->data,article);
	  
	  numWords++;
	  if (strlen(word) > strlen(longestWord))
	    strcpy(longestWord, word);
	}
      }
    }
  }

  printf("\tWe counted %d well-formed words [including duplicates].\n", numWords);
  printf("\tThe longest word scanned was \"%s\".", longestWord);
  if (strlen(longestWord) >= 15 && (strchr(longestWord, '-') == NULL)) 
    printf(" [Ooooo... long word!]");
  printf("\n");
}
コード例 #3
0
static void ProcessResponse(const char *askedWord,void* userData)
{ 
  if (WordIsWellFormed(askedWord)) {
    rssFeedData *data = userData;
    if(HashSetLookup(&data->stopWords,&askedWord)==NULL){    
      
      indexData* resultData = HashSetLookup(&data->indices,&askedWord); 
      printResult(resultData, askedWord);

    }else printf("\tToo common a word to be taken seriously. Try something more specificn %s \n", askedWord);
  }else printf("\tWe won't be allowing words like \"%s\" into our set of indices.\n", askedWord);
}
コード例 #4
0
static void ProcessResponse(const char *word, hashset *stopWords,
			    hashset *wordHash, hashset *articlesSeen)
{
  if (WordIsWellFormed(word)) {
    if (HashSetLookup(stopWords, &word) != NULL) {
      printf("This is too common a word. Please be more specific.\n");
    } else {
      ProcessValidResponse(word,stopWords,wordHash,articlesSeen);
    }
  } else {
      printf("We won't be allowing words like \"%s\" into our set of indices.\n", word);
  }
}
コード例 #5
0
ファイル: rss-news-search.c プロジェクト: azavadil/CS107_PA
static void ScanArticle(streamtokenizer *st, article *a, int articleIndex, rssData *allData )
{
  int numWords = 0;
  char word[1024];
  char longestWord[1024] = {'\0'};

  while (STNextToken(st, word, sizeof(word))) {
    if (strcasecmp(word, "<") == 0) {
      SkipIrrelevantContent(st); // in html-utls.h
    } else {
      RemoveEscapeCharacters(word);
      if (WordIsWellFormed(word)) {
	numWords++;	
	char *dummy = word;
	
	
	
	if ( WordNotInStopwords(&allData->stopwords, word)) { 
	  /**  Try looking up the word. If the word is not in the indices, create a new indexEntry 
	   *   initialized with the word and an empty vector and enter it into the hashset
	   */
	  indexEntry entry = {word}; 
	  indexEntry *found = HashSetLookup(&allData->indices, &entry);

	  if (found == NULL) {  
	    entry.word = strdup(dummy); 
	    VectorNew(&entry.articles, sizeof(wordcountEntry), NULL, 10); 
	    HashSetEnter(&allData->indices, &entry);
	  }

	  // now we act as if the entry was in the index all along
	  found  = (indexEntry*)HashSetLookup( &allData->indices, &entry); 	  
	  


	  UpdateIndices(&found->articles, articleIndex);

	}
	if (strlen(word) > strlen(longestWord))
	  strcpy(longestWord, word);
      }
    }
  }

  printf("\tWe counted %d well-formed words [including duplicates].\n", numWords);
  printf("\tThe longest word scanned was \"%s\".", longestWord);
  if (strlen(longestWord) >= 15 && (strchr(longestWord, '-') == NULL)) 
    printf(" [Ooooo... long word!]");
  printf("\n");
}
コード例 #6
0
static void ScanArticle(streamtokenizer *st, article* a,
			hashset* stopWords, hashset* wordHash,
			hashset *articlesSeen)
{
  char word[1024];

  while (STNextToken(st, word, sizeof(word))) {
    if (strcasecmp(word, "<") == 0) {
      SkipIrrelevantContent(st); // in html-utls.h
    } else {
      RemoveEscapeCharacters(word);
      if (WordIsWellFormed(word)) {
	ProcessWellFormedWord(word,a,stopWords,wordHash,articlesSeen);
      }
    }
  }
}
コード例 #7
0
ファイル: rss-news-search.c プロジェクト: azavadil/CS107_PA
static void ProcessResponse(const char *word, rssData *allData)
{

  if (WordIsWellFormed(word)) {
        void *found = HashSetLookup(&allData->indices, &word); 
	if (found != NULL) {
	  indexEntry *entry = (indexEntry*)found;
	  VectorSort(&entry->articles, ReverseWordcountCmp); 
	  VectorMap(&entry->articles, PrintArticle, &allData->explored);
	  	  
	} else {
	  printf("\tWord not found in our indices\n");
	} 
  } else {
	printf("\tWe won't be allowing words like \"%s\" into our set of indices.\n", word);
  }
  
}  
コード例 #8
0
ファイル: rss-news-search.c プロジェクト: mariolew/cs107
static void ProcessResponse(const char *word, hashset *stopWords, hashset *wordCounts)
{
  if (!WordIsWellFormed(word)) {
      //printf("\tWell, we don't have the database mapping words to online news articles yet, but if we DID have\n");
      //printf("\tour hashset of indices, we'd list all of the articles containing \"%s\".\n", word);
      
      printf("\tWe won't be allowing words like \"%s\" into our set of indices.\n", word);
  } else if(HashSetLookup(stopWords, &word)) {
      printf("\tToo common a word to be taken seriously.  Try something more specific.\n");
  } else {
      vector *found = wordCountLookup(wordCounts, word);
      if (found != NULL) {
          printArticles(found, word);
      }
      else {
          printf("\tNone of today's news contain the word \"%s\". \n", word);
      }
  }
}
コード例 #9
0
ファイル: rss-news-search.c プロジェクト: ej2xu/cs107
static void ProcessResponse(rssDatabase *db, const char *word)
{
  if (!WordIsWellFormed(word)) {
    printf("That search term couldn't possibly be in our set of indices.\n\n");
    return;
  } 
  
  if (HashSetLookup(&db->stopWords, &word) != NULL) {
    printf("\"%s\" is too common a word to be taken seriously.  Please be more specific.\n\n", word);
    return;
  }

  rssIndexEntry entry = { word };
  rssIndexEntry *existingIndex = HashSetLookup(&db->indices, &entry);
  if (existingIndex == NULL) {
    printf("None of today's news articles contain the word \"%s\".\n\n", word);
    return;
  }

  ListTopArticles(existingIndex, &db->previouslySeenArticles);
}
コード例 #10
0
ファイル: rss-news-search.c プロジェクト: siumai/107
static void ProcessResponse(const char *word, void *userData)
{
  if (WordIsWellFormed(word)) {
	rssFeedData *data = userData;
	if(HashSetLookup(&data->stopWords, &word)==NULL) {
		indexData *resultData = HashSetLookup(&data->indices, &word);
		if(resultData!=NULL) {
			vector resultVector = resultData->counters;
			printf("there are %d records of this word", VectorLength(&resultVector));
			VectorSort(&resultVector, SortVectorCmpFn);
			int i=1;
			VectorMap(&resultVector, PrintResultMapFn, &i);
			printf("\n");
		} else {
			printf("\tWe don't have records about %s into our set of indices.\n", word);
		}
	}
    
  } else {
    printf("\tWe won't be allowing words like \"%s\" into our set of indices.\n", word);
  }
}
コード例 #11
0
ファイル: rss-news-search.c プロジェクト: siumai/107
static void ScanArticle(streamtokenizer *st, rssFeedData *data)
{
  articleData *article = addArticle(&data->articles, &data->rssItem);
  /*rssFeedItem *item = &(data->rssItem);
  char *articleTitle = item->title;
  char *articleURL = item->url;*/

  int numWords = 0;
  char word[1024];
  char longestWord[1024] = {'\0'};

  while (STNextToken(st, word, sizeof(word))) {
    if (strcasecmp(word, "<") == 0) {
      SkipIrrelevantContent(st); // in html-utls.h
    } else {
      RemoveEscapeCharacters(word);
      if (WordIsWellFormed(word)) {
	char *dummy = word;
	if (HashSetLookup(&(data->stopWords), &dummy)==NULL) {
		//not in stop list, index the word
		indexData *entry = addWordRecord(&data->indices, word);
		indexWord(&entry->counters, article);
		numWords++;
		if (strlen(word) > strlen(longestWord))
	  		strcpy(longestWord, word);
	}
      }
    }
  }

  printf("\tWe counted %d well-formed words [including duplicates].\n", numWords);
  printf("\tThe longest word scanned was \"%s\".", longestWord);
  if (strlen(longestWord) >= 15 && (strchr(longestWord, '-') == NULL)) 
    printf(" [Ooooo... long word!]");
  printf("\n");
}
コード例 #12
0
ファイル: rss-news-search.c プロジェクト: ej2xu/cs107
static bool WordIsWorthIndexing(const char *word, hashset *stopWords)
{
  return WordIsWellFormed(word) && HashSetLookup(stopWords, &word) == NULL;
}