static void ScanArticle(streamtokenizer *st, void* userData) { rssFeedData *data = userData; articleData* article = AddArticle(&data->articles,&data->item); int numWords = 0; char word[1024]; char longestWord[1024] = {'\0'}; while (STNextToken(st, word, sizeof(word))) { if (strcasecmp(word, "<") == 0) { SkipIrrelevantContent(st); // in html-utls.h } else { RemoveEscapeCharacters(word); if (WordIsWellFormed(word)) { char* dummy = word;//need this becouse cant do &word in c if(HashSetLookup(&data->stopWords,&dummy)==NULL){// skip stopwords indexData *entry = addWordRecord(&data->indices, word); indexWord(&entry->data,article); numWords++; if (strlen(word) > strlen(longestWord)) strcpy(longestWord, word); } } } } printf("\tWe counted %d well-formed words [including duplicates].\n", numWords); printf("\tThe longest word scanned was \"%s\".", longestWord); if (strlen(longestWord) >= 15 && (strchr(longestWord, '-') == NULL)) printf(" [Ooooo... long word!]"); printf("\n"); }
static void ScanArticle(streamtokenizer *st, rssFeedData *data) { articleData *article = addArticle(&data->articles, &data->rssItem); /*rssFeedItem *item = &(data->rssItem); char *articleTitle = item->title; char *articleURL = item->url;*/ int numWords = 0; char word[1024]; char longestWord[1024] = {'\0'}; while (STNextToken(st, word, sizeof(word))) { if (strcasecmp(word, "<") == 0) { SkipIrrelevantContent(st); // in html-utls.h } else { RemoveEscapeCharacters(word); if (WordIsWellFormed(word)) { char *dummy = word; if (HashSetLookup(&(data->stopWords), &dummy)==NULL) { //not in stop list, index the word indexData *entry = addWordRecord(&data->indices, word); indexWord(&entry->counters, article); numWords++; if (strlen(word) > strlen(longestWord)) strcpy(longestWord, word); } } } } printf("\tWe counted %d well-formed words [including duplicates].\n", numWords); printf("\tThe longest word scanned was \"%s\".", longestWord); if (strlen(longestWord) >= 15 && (strchr(longestWord, '-') == NULL)) printf(" [Ooooo... long word!]"); printf("\n"); }