int main(int argc, char **argv) { Welcome(kWelcomeTextFile); hashset stopWords; BuildStopWordsHashset(&stopWords, kDefaultStopWordsFile); hashset wordHash; HashSetNew(&wordHash, sizeof(currWord), 10007, WordHashFn, WordCompare, WordFree); hashset articlesSeen; HashSetNew(&articlesSeen, sizeof(article), 10007, ArticleHashFn, ArticleCompare, ArticleFree); BuildIndices((argc == 1) ? kDefaultFeedsFile : argv[1], &stopWords, &wordHash, &articlesSeen); QueryIndices(&stopWords, &wordHash, &articlesSeen); HashSetDispose(&stopWords); HashSetDispose(&wordHash); HashSetDispose(&articlesSeen); return 0; }
int main(int argc, char **argv) { static const char *stopwordFilename = "/home/compilers/media/assn-4-rss-news-search-data/stop-words.txt"; static const int kStopwordBuckets = 1009; static const int kIndexNumBuckets = 10007; rssData allData; HashSetNew(&allData.stopwords, sizeof(char*), kStopwordBuckets, StringHash, StringCmp, StringFree); HashSetNew(&allData.indices, sizeof(indexEntry), kIndexNumBuckets, IndexHash, IndexCmp, IndexFree); // this vector VectorNew(&allData.explored, sizeof(article), ArticleFree, 10); Welcome(kWelcomeTextFile); ReadStopwords(&allData.stopwords, stopwordFilename); BuildIndices((argc == 1) ? kDefaultFeedsFile : argv[1], &allData ); int hcount = HashSetCount(&allData.indices); printf("hcount: %d\n", hcount); printf("Finished BuildIndices\n"); QueryIndices(&allData); return 0; }
/** * Function: TestHashTable * ----------------------- * Runs a test of the hashset using a frequency structure as the element * type. It will open a file, read each char, and count the number of * times each char occurs. Tests enter, lookup, and mapping for the hashset. * Prints contents of table to stdout. Then it dumps all the table elements * into a vector and sorts them by frequency of occurrences and * prints the array out. Note that this particular stress test passes * 0 as the initialAllocation, which the vector is required to handle * gracefully - be careful! */ static void TestHashTable(void) { hashset counts; vector sortedCounts; HashSetNew(&counts, sizeof(struct frequency), kNumBuckets, HashFrequency, CompareLetter, NULL); fprintf(stdout, "\n\n ------------------------- Starting the HashTable test\n"); BuildTableOfLetterCounts(&counts); fprintf(stdout, "Here is the unordered contents of the table:\n"); HashSetMap(&counts, PrintFrequency, stdout); // print contents of table VectorNew(&sortedCounts, sizeof(struct frequency), NULL, 0); HashSetMap(&counts, AddFrequency, &sortedCounts); // add all freq to array VectorSort(&sortedCounts, CompareLetter); // sort by char fprintf(stdout, "\nHere are the trials sorted by char: \n"); VectorMap(&sortedCounts, PrintFrequency, stdout); VectorSort(&sortedCounts, CompareOccurrences); //sort by occurrences fprintf(stdout, "\nHere are the trials sorted by occurrence & char: \n"); VectorMap(&sortedCounts, PrintFrequency, stdout); // print out array VectorDispose(&sortedCounts); // free all storage HashSetDispose(&counts); }
static void BuildIndices(rssDatabase *db, const char *feedsFileURL) { url u; urlconnection urlconn; URLNewAbsolute(&u, feedsFileURL); URLConnectionNew(&urlconn, &u); if (urlconn.responseCode / 100 == 3) { BuildIndices(db, urlconn.newUrl); } else { streamtokenizer st; char remoteFileName[2048]; HashSetNew(&db->indices, sizeof(rssIndexEntry), kNumIndexEntryBuckets, IndexEntryHash, IndexEntryCompare, IndexEntryFree); VectorNew(&db->previouslySeenArticles, sizeof(rssNewsArticle), NewsArticleFree, 0); STNew(&st, urlconn.dataStream, kNewLineDelimiters, true); while (STSkipUntil(&st, ":") != EOF) { // ignore everything up to the first selicolon of the line STSkipOver(&st, ": "); // now ignore the semicolon and any whitespace directly after it STNextToken(&st, remoteFileName, sizeof(remoteFileName)); ProcessFeed(db, remoteFileName); } printf("\n"); STDispose(&st); } URLConnectionDispose(&urlconn); URLDispose(&u); }
static void LoadStopWords(hashset *stopWords, const char *stopWordsURL) { url u; urlconnection urlconn; URLNewAbsolute(&u, stopWordsURL); URLConnectionNew(&urlconn, &u); if (urlconn.responseCode / 100 == 3) { LoadStopWords(stopWords, urlconn.newUrl); } else { streamtokenizer st; char buffer[4096]; HashSetNew(stopWords, sizeof(char *), kNumStopWordsBuckets, StringHash, StringCompare, StringFree); STNew(&st, urlconn.dataStream, kNewLineDelimiters, true); while (STNextToken(&st, buffer, sizeof(buffer))) { char *stopWord = strdup(buffer); HashSetEnter(stopWords, &stopWord); } STDispose(&st); } URLConnectionDispose(&urlconn); URLDispose(&u); }
static void CreateDataStructure(rssFeedData* data){ hashset stopWords; HashSetNew(&stopWords,sizeof(char**),kNumStopWordsBuckets,StringHash, StringCmp,StringFree); memcpy(&data->stopWords,&stopWords,sizeof(hashset)); hashset articles; //pointers to article and not articles to make it possible to reallocate data insside hashset without changing article pointers HashSetNew(&articles,sizeof(articleData*),kNumStopWordsBuckets, ArticleHash, ArticleCmp,ArticleFree); memcpy(&data->articles,&articles,sizeof(hashset)); hashset indices; HashSetNew(&indices,sizeof(indexData),kNumIndexBuckets,IndexHash,IndexCmp, IndexFree); memcpy(&data->indices,&indices,sizeof(hashset)); memset(&data->item,0,sizeof(rssFeedItem)); }
int main(int argc, const char *argv[]) { hashset thesaurus; HashSetNew(&thesaurus, sizeof(thesaurusEntry), kApproximateWordCount, StringHash, StringCompare, ThesEntryFree); const char *thesaurusFileName = (argc == 1) ? "/Users/shankuljain/Documents/Programming WorkSpace/CS107/assn 3/assn-3-vector-hashset-data/thesaurus.txt" : argv[1]; ReadThesaurus(&thesaurus, thesaurusFileName); QueryThesaurus(&thesaurus); HashSetDispose(&thesaurus); return 0; }
static void initThreadsData(rssDatabase *db) { VectorNew(&db->threads, sizeof(threadData),ThreadDataFree,0); HashSetNew(&(db->locks.limitConnToServerLock),sizeof(serverLockData),kNumOfServersBuckets, ConnectionsLockHash, ConnectionsLockCompare,ConnectionsLockFree); pthread_mutex_init(&(db->locks.serverDataLock), NULL); pthread_mutex_init(&(db->locks.articlesVectorLock), NULL); pthread_mutex_init(&(db->locks.indicesHashSetLock), NULL); pthread_mutex_init(&(db->locks.stopWordsHashSetLock), NULL); sem_init(&(db->locks.connectionsLock),0,kNumOfConnections); }
static void loadStopWords(hashset *s) { HashSetNew(s, sizeof(char *), 1009, StringHash, StringCmp, StringFree); FILE *infile; streamtokenizer st; char buffer[1024]; infile = fopen(kStopListFile, "r"); assert(infile != NULL); STNew(&st, infile, kNewLineDelimiters, true); while (STNextToken(&st, buffer, sizeof(buffer))) { char *newWord = strdup(buffer); HashSetEnter(s, &newWord); } STDispose(&st); fclose(infile); }
//stop words, so we choose //the first prime > 1000. static void BuildStopWordsHashset(hashset *stopWords, const char *stopWordsFileName) { FILE *infile; streamtokenizer st; char buffer[1024]; infile = fopen(stopWordsFileName, "r"); assert(infile != NULL); HashSetNew(stopWords, sizeof(char*), kApproximateWordCount, StringHash, StringCompare, StringFree); STNew(&st, infile, kNewLineDelimiters, true); while (STNextToken(&st, buffer, sizeof(buffer))) { char *elem = strdup(buffer); HashSetEnter(stopWords, &elem); } STDispose(&st); fclose(infile); }
void calculateNext(hashset *hash, vector* keys, int k, char* f, int size) { int currK, i, elems = 0, seedNumber; char *nxt, *cnt, *key2, *storage = (char*)malloc(sizeof(char) * k); FILE *fileR; vectorv keyNext; keysv *rs, key; k+=1; fileR = fopen(f, "r"); assert(fileR != NULL && "Cannot open the file"); VectorNew(keys, sizeof(char*) * k, NULL, 10); while (fgets(storage, k, fileR) != NULL) { currK = strlen(storage); if (currK < k && storage[currK - 1] == '\n') { fgets(&storage[currK], k - currK, fileR); } VectorAppend(keys, storage); } storage = (char*)VectorNth(keys, keys->currentPosition - 1); fclose(fileR); HashSetNew(hash, sizeof(keysv), keys->currentPosition * 3, hashVector, cmpVector, NULL); for (i = 0; i < (keys->currentPosition - 1); i++) { rs = (keysv*)malloc(sizeof(keysv)); vector nexts; cnt = VectorNth(keys, i); nxt = VectorNth(keys, i + 1); rs->string = strdup(cnt); rs = (keysv*)HashSetLookup(hash, rs); keyNext.string = nxt; key.string = cnt; if (rs == NULL) { keyNext.frecuency = 1; VectorNew(&nexts, sizeof(vectorv), NULL, 1); VectorAppend(&nexts, &keyNext); key.frecuency = 1; key.vectorv = nexts; key.amount = 1; HashSetEnter(hash, &key); } else { rs->frecuency++; rs->amount++; vectorv* rSucessor; int idx = VectorSearch(&rs->vectorv, &keyNext, cmpvct, 0, false); if (idx >= 0) { rSucessor = VectorNth(&rs->vectorv, idx); rSucessor->frecuency++; } else { keyNext.frecuency = 1; VectorAppend(&rs->vectorv, &keyNext); } } } key.string = VectorNth(keys, keys->currentPosition - 1); key.frecuency = 1; key.amount = 0; HashSetEnter(hash, &key); if (k == 0) { elems = keys->currentPosition; } else { HashSetMap(hash, mapFn, &elems); } seedNumber = rand() % elems; key2 = (char*)VectorNth(keys, seedNumber); printf("Generated text:\n"); printf("%s", key2); if (k > 0) { for (i = 0; i < size;) { key2 = ran(hash, keys, key2); printf("%s", key2); if (strstr(key2, " ") != NULL || strstr(key2, "\n") != NULL) { i++; } } } else { for (i = 0; i < size;) { seedNumber = rand() % elems; key2 = (char*)VectorNth(keys, seedNumber); printf("%s", key2); if (strstr(key2, " ") != NULL || strstr(key2, "\n") != NULL) { i++; } } } printf("\n"); }
static void CreateDataStructure(rssFeedData *data) { HashSetNew(&(data->stopWords), sizeof(char **), kNumStopWordsBuckets, StringHash, StringCmp, StringFree); HashSetNew(&(data->articles), sizeof(articleData*), kNumStopWordsBuckets, ArticleHash, ArticleCmp, ArticleFree); HashSetNew(&(data->indices), sizeof(indexData), kNumStopWordsBuckets, IndexHash, IndexCmp, IndexFree); //printf("data created\n"); }
void InitializeWordCounts(hashset *wordCount) { HashSetNew(wordCount, sizeof(wordSet), 10007, wordHashFn, wordCmpFn, wordSetFreeFn); }