static void TestSearch(vector *v, char ch) { int foundSorted, foundNot; foundSorted = VectorSearch(v, &ch, CompareChar, 0, true); // Test sorted foundNot = VectorSearch(v, &ch, CompareChar, 0, false); // Not sorted fprintf(stdout,"\nFound '%c' in sorted array? %s. How about unsorted? %s.", ch, YES_OR_NO((foundSorted != -1)), YES_OR_NO((foundNot != -1))); }
static void TestSearch(vector *v, char ch) { int foundSorted, foundNot; foundSorted = VectorSearch(v, &ch, CompareChar, 0, true); // Test sorted //printf("foundSorted: %d\n", foundSorted); foundNot = VectorSearch(v, &ch, CompareChar, 0, false); // Not sorted //printf("foundNot: %d\n", foundNot); //printf("After searching\n"); fprintf(stdout,"\nFound '%c' in sorted array? %s. How about unsorted? %s.", ch, YES_OR_NO((foundSorted != -1)), YES_OR_NO((foundNot != -1))); //printf("End TestSearch()\n"); }
static void ParseArticle(rssDatabase *db, const char *articleTitle, const char *articleURL) { url u; urlconnection urlconn; streamtokenizer st; int articleID; URLNewAbsolute(&u, articleURL); rssNewsArticle newsArticle = { articleTitle, u.serverName, u.fullName }; pthread_mutex_t *articlesLock = &(db->locks.articlesVectorLock); pthread_mutex_lock(articlesLock); if (VectorSearch(&db->previouslySeenArticles, &newsArticle, NewsArticleCompare, 0, false) >= 0) { pthread_mutex_unlock(articlesLock); printf("[Ignoring \"%s\": we've seen it before.]\n", articleTitle); URLDispose(&u); return; } pthread_mutex_unlock(articlesLock); lockConnection(db,u.serverName); URLConnectionNew(&urlconn, &u); switch (urlconn.responseCode) { case 0: printf("Unable to connect to \"%s\". Domain name or IP address is nonexistent.\n", articleURL); break; case 200: //printf("[%s] Ready to Index \"%s\"\n", u.serverName, articleTitle); pthread_mutex_lock(articlesLock); printf("[%s] Indexing \"%s\"\n", u.serverName, articleTitle); NewsArticleClone(&newsArticle, articleTitle, u.serverName, u.fullName); VectorAppend(&db->previouslySeenArticles, &newsArticle); articleID = VectorLength(&db->previouslySeenArticles) - 1; pthread_mutex_unlock(articlesLock); STNew(&st, urlconn.dataStream, kTextDelimiters, false); ScanArticle(&st, articleID, &db->indices, &db->stopWords, &(db->locks.indicesHashSetLock),&(db->locks.stopWordsHashSetLock) ); STDispose(&st); break; case 301: case 302:{ // just pretend we have the redirected URL all along, though index using the new URL and not the old one... int newURLLength = strlen(urlconn.newUrl)+1; char newURLBuffer[newURLLength]; strcpy(newURLBuffer, urlconn.newUrl); URLConnectionDispose(&urlconn); unlockConnection(db,u.serverName); URLDispose(&u); ParseArticle(db, articleTitle, newURLBuffer); return; } default: printf("Unable to pull \"%s\" from \"%s\". [Response code: %d] Punting...\n", articleTitle, u.serverName, urlconn.responseCode); break; } URLConnectionDispose(&urlconn); unlockConnection(db,u.serverName); URLDispose(&u); }
static void UpdateOccurences(vector *articles, article *a) { int index = VectorSearch(articles, a, ArticleCompare, 0, false); if(index==-1) { a->numOccurrences = 1; VectorAppend(articles, a); } else { article* currArt = (article*)VectorNth(articles, index); currArt->numOccurrences++; } }
void HashSetEnter(hashset *h, const void *elemAddr){ assert(elemAddr != NULL && "elemAddr can't be NULL"); int hashCode = h->hashfn(elemAddr, h->numBuckets); vector *vAddress; assert(hashCode >= 0 && hashCode < h->numBuckets && "not possible to insert the specified element into the specified hashset"); vAddress = h->buckets + hashCode; int position = VectorSearch(vAddress, elemAddr, h->comparefn, 0, false); if (position == -1) VectorAppend(vAddress, elemAddr); else VectorReplace(vAddress, elemAddr, position); }
void *HashSetLookup(const hashset *h, const void *elemAddr){ assert(elemAddr != NULL && "elemAddr can't be NULL"); int hashCode = h->hashfn(elemAddr, h->numBuckets); vector *vAddress; assert(hashCode >= 0 && hashCode < h->numBuckets && "not possible to insert the specified element into the specified hashset"); vAddress = h->buckets + hashCode; int position = VectorSearch(vAddress, elemAddr, h->comparefn, 0, false); if (position == -1) return NULL; else return VectorNth(&h->buckets[hashCode], position); return NULL; }
static void TestReplace(vector *alphabet) { int found = 0; char toFind = 's', toReplace = '*'; while (found < VectorLength(alphabet)) { found = VectorSearch(alphabet, &toFind, CompareChar, found, false); if (found == -1) break; VectorReplace(alphabet, &toReplace, found); } fprintf(stdout, "\nAfter changing all %c to %c: ", toFind, toReplace); VectorMap(alphabet, PrintChar, stdout); }
static void indexWord(vector *data,articleData *article){ wordCounter indexEntry; indexEntry.article = article; int elemPosition = VectorSearch(data, &indexEntry,FindArticleRecordCmpFn, 0, false); if(elemPosition == -1){ indexEntry.counter = 1; VectorAppend(data,&indexEntry); }else { wordCounter* findRecord=VectorNth(data,elemPosition); findRecord->counter++; } }
static void indexWord(vector *counters, articleData *article) { wordCounter indexEntry; indexEntry.articleItem = article; int elemPosition = VectorSearch(counters, &indexEntry, FindArticleRecordCmpFn, 0, false); if(elemPosition == -1) { indexEntry.count = 1; VectorAppend(counters, &indexEntry); } else { wordCounter* record = VectorNth(counters, elemPosition); record->count++; } }
static void UpdateIndices(vector *articlesForWord, int articleIndex){ // initialize a wordcountEntry with the articleIndex and a wordcount of 0 wordcountEntry newWordcount; newWordcount.articleIndex = articleIndex; newWordcount.wordcount = 0; int idx = VectorSearch( articlesForWord, &newWordcount, WordcountEntryCmp, 0, false); // if the wordcountEntry isn't in the vector, add the entry if (idx == -1) { VectorAppend( articlesForWord, &newWordcount); } else { wordcountEntry *found = (wordcountEntry*)VectorNth(articlesForWord, idx); found->wordcount++; } }
static void ParseArticle(const char *articleURL, const char *articleTitle, rssData *allData) { url u; urlconnection urlconn; streamtokenizer st; int articleIndex; URLNewAbsolute(&u, articleURL); /* check to see if we've previously scanned the article. If the article we're processing * has already been scanned release the url and return */ article a = {articleURL, articleTitle, u.serverName}; if(VectorSearch(&allData->explored, &a, ArticleCmp, 0, false) >= 0) { printf("[Pass. article already indexed: \"%s\"]\n", articleTitle); URLDispose(&u); return; } URLConnectionNew(&urlconn, &u); switch (urlconn.responseCode) { case 0: printf("Unable to connect to \"%s\". Domain name or IP address is nonexistent.\n", articleURL); break; case 200: printf("Scanning \"%s\" from \"http://%s\"\n", articleTitle, u.serverName); STNew(&st, urlconn.dataStream, kTextDelimiters, false); PersistArticle(&a, articleURL, articleTitle, u.serverName); VectorAppend(&allData->explored, &a); articleIndex = VectorLength(&allData->explored)-1; ScanArticle(&st, &a, articleIndex, allData); STDispose(&st); break; case 301: case 302: // just pretend we have the redirected URL all along, though index using the new URL and not the old one... ParseArticle(urlconn.newUrl, articleTitle, allData ); break; default: printf("Unable to pull \"%s\" from \"%s\". [Response code: %d] Punting...\n", articleTitle, u.serverName, urlconn.responseCode); break; } URLConnectionDispose(&urlconn); URLDispose(&u); }
void WordCountEnter(hashset *wordCount, const char *word, const char *articleTitle, const char *articleURL) { /** * Three possible cases: * 1. Word has not been entered * 2. Word has been entered, but word/article combination has not * 3. Word/article combination has been entered */ wordSet *existingWord = (wordSet *) HashSetLookup(wordCount, &word); // ensure that the word exists in the hashset if (existingWord == NULL) { wordSet ws; ws.word = strdup(word); VectorNew(&ws.occ, sizeof(articleCount), articleCountFreeFn, 25); HashSetEnter(wordCount, &ws); } // an entry for the word should always exist now existingWord = (wordSet *) HashSetLookup(wordCount, &word); assert(existingWord != NULL); // now either add the article to the word count vector or increment its current count articleCount articleKey = { { (char *) articleTitle, (char *) articleURL }, 1 }; int existingArticleIndex = VectorSearch(&existingWord->occ, &articleKey, articleCountCompareFn, 0, false); if (existingArticleIndex == -1) { // word/article pairing is new, append it to the vector with a count of 1 articleCount newArticle; (newArticle.source).title = strdup(articleTitle); (newArticle.source).url = strdup(articleURL); newArticle.count = 1; VectorAppend(&existingWord->occ, &newArticle); } else { // word/article pairing exists, increment its count articleCount *existingArticle = (articleCount *) VectorNth(&existingWord->occ, existingArticleIndex); existingArticle->count++; } }
static void AddWordToIndices(hashset *indices, const char *word, int articleIndex) { rssIndexEntry indexEntry = { word }; // partial intialization rssIndexEntry *existingIndexEntry = HashSetLookup(indices, &indexEntry); if (existingIndexEntry == NULL) { indexEntry.meaningfulWord = strdup(word); VectorNew(&indexEntry.relevantArticles, sizeof(rssRelevantArticleEntry), NULL, 0); HashSetEnter(indices, &indexEntry); existingIndexEntry = HashSetLookup(indices, &indexEntry); // pretend like it's been there all along assert(existingIndexEntry != NULL); } rssRelevantArticleEntry articleEntry = { articleIndex, 0 }; int existingArticleIndex = VectorSearch(&existingIndexEntry->relevantArticles, &articleEntry, ArticleIndexCompare, 0, false); if (existingArticleIndex == -1) { VectorAppend(&existingIndexEntry->relevantArticles, &articleEntry); existingArticleIndex = VectorLength(&existingIndexEntry->relevantArticles) - 1; } rssRelevantArticleEntry *existingArticleEntry = VectorNth(&existingIndexEntry->relevantArticles, existingArticleIndex); existingArticleEntry->freq++; }
void *HashSetLookup(const hashset *h, const void *elemAddr) { vector *v = HashSetElemVector(h, elemAddr); int pos=VectorSearch(v, elemAddr, h->comparefn, 0, false); return pos==-1?NULL:VectorNth(v, pos); }
void calculateNext(hashset *hash, vector* keys, int k, char* f, int size) { int currK, i, elems = 0, seedNumber; char *nxt, *cnt, *key2, *storage = (char*)malloc(sizeof(char) * k); FILE *fileR; vectorv keyNext; keysv *rs, key; k+=1; fileR = fopen(f, "r"); assert(fileR != NULL && "Cannot open the file"); VectorNew(keys, sizeof(char*) * k, NULL, 10); while (fgets(storage, k, fileR) != NULL) { currK = strlen(storage); if (currK < k && storage[currK - 1] == '\n') { fgets(&storage[currK], k - currK, fileR); } VectorAppend(keys, storage); } storage = (char*)VectorNth(keys, keys->currentPosition - 1); fclose(fileR); HashSetNew(hash, sizeof(keysv), keys->currentPosition * 3, hashVector, cmpVector, NULL); for (i = 0; i < (keys->currentPosition - 1); i++) { rs = (keysv*)malloc(sizeof(keysv)); vector nexts; cnt = VectorNth(keys, i); nxt = VectorNth(keys, i + 1); rs->string = strdup(cnt); rs = (keysv*)HashSetLookup(hash, rs); keyNext.string = nxt; key.string = cnt; if (rs == NULL) { keyNext.frecuency = 1; VectorNew(&nexts, sizeof(vectorv), NULL, 1); VectorAppend(&nexts, &keyNext); key.frecuency = 1; key.vectorv = nexts; key.amount = 1; HashSetEnter(hash, &key); } else { rs->frecuency++; rs->amount++; vectorv* rSucessor; int idx = VectorSearch(&rs->vectorv, &keyNext, cmpvct, 0, false); if (idx >= 0) { rSucessor = VectorNth(&rs->vectorv, idx); rSucessor->frecuency++; } else { keyNext.frecuency = 1; VectorAppend(&rs->vectorv, &keyNext); } } } key.string = VectorNth(keys, keys->currentPosition - 1); key.frecuency = 1; key.amount = 0; HashSetEnter(hash, &key); if (k == 0) { elems = keys->currentPosition; } else { HashSetMap(hash, mapFn, &elems); } seedNumber = rand() % elems; key2 = (char*)VectorNth(keys, seedNumber); printf("Generated text:\n"); printf("%s", key2); if (k > 0) { for (i = 0; i < size;) { key2 = ran(hash, keys, key2); printf("%s", key2); if (strstr(key2, " ") != NULL || strstr(key2, "\n") != NULL) { i++; } } } else { for (i = 0; i < size;) { seedNumber = rand() % elems; key2 = (char*)VectorNth(keys, seedNumber); printf("%s", key2); if (strstr(key2, " ") != NULL || strstr(key2, "\n") != NULL) { i++; } } } printf("\n"); }