static void ScanArticle(streamtokenizer *st, void* userData)
{
  rssFeedData *data = userData;
  articleData* article = AddArticle(&data->articles,&data->item);
  
  int numWords = 0;
  char word[1024];
  char longestWord[1024] = {'\0'};

  while (STNextToken(st, word, sizeof(word))) {
    if (strcasecmp(word, "<") == 0) {
      SkipIrrelevantContent(st); // in html-utls.h
    } else {
      RemoveEscapeCharacters(word);
      if (WordIsWellFormed(word)) {
	char* dummy = word;//need this becouse cant do &word in c
	if(HashSetLookup(&data->stopWords,&dummy)==NULL){// skip stopwords

	  indexData *entry = addWordRecord(&data->indices, word);
	  indexWord(&entry->data,article);
	  
	  numWords++;
	  if (strlen(word) > strlen(longestWord))
	    strcpy(longestWord, word);
	}
      }
    }
  }

  printf("\tWe counted %d well-formed words [including duplicates].\n", numWords);
  printf("\tThe longest word scanned was \"%s\".", longestWord);
  if (strlen(longestWord) >= 15 && (strchr(longestWord, '-') == NULL)) 
    printf(" [Ooooo... long word!]");
  printf("\n");
}
Exemple #2
0
static void ScanArticle(streamtokenizer *st, rssFeedData *data)
{
  articleData *article = addArticle(&data->articles, &data->rssItem);
  /*rssFeedItem *item = &(data->rssItem);
  char *articleTitle = item->title;
  char *articleURL = item->url;*/

  int numWords = 0;
  char word[1024];
  char longestWord[1024] = {'\0'};

  while (STNextToken(st, word, sizeof(word))) {
    if (strcasecmp(word, "<") == 0) {
      SkipIrrelevantContent(st); // in html-utls.h
    } else {
      RemoveEscapeCharacters(word);
      if (WordIsWellFormed(word)) {
	char *dummy = word;
	if (HashSetLookup(&(data->stopWords), &dummy)==NULL) {
		//not in stop list, index the word
		indexData *entry = addWordRecord(&data->indices, word);
		indexWord(&entry->counters, article);
		numWords++;
		if (strlen(word) > strlen(longestWord))
	  		strcpy(longestWord, word);
	}
      }
    }
  }

  printf("\tWe counted %d well-formed words [including duplicates].\n", numWords);
  printf("\tThe longest word scanned was \"%s\".", longestWord);
  if (strlen(longestWord) >= 15 && (strchr(longestWord, '-') == NULL)) 
    printf(" [Ooooo... long word!]");
  printf("\n");
}