static void TokenizeAndBuildThesaurus(hashset *thesaurus, streamtokenizer *st) { printf("Loading thesaurus. Be patient! "); fflush(stdout); char buffer[2048]; while (STNextToken(st, buffer, sizeof(buffer))) { thesaurusEntry entry; entry.word = strdup(buffer); VectorNew(&entry.synonyms, sizeof(char *), StringFree, 4); while (STNextToken(st, buffer, sizeof(buffer)) && (buffer[0] == ',')) { STNextToken(st, buffer, sizeof(buffer)); char *synonym = strdup(buffer); VectorAppend(&entry.synonyms, &synonym); } HashSetEnter(thesaurus, &entry); if (HashSetCount(thesaurus) % 1000 == 0) { printf("."); fflush(stdout); } } printf(" [All done!]\n"); fflush(stdout); }
/* helper to create a skiplist-node, walk the given the ref-region fo find and enter all skip positions */ static struct skiplist_ref_node * make_skiplist_ref_node( const struct reference_region * r ) { struct skiplist_ref_node * res = calloc( 1, sizeof *res ); if ( res != NULL ) { uint32_t i, n = VectorLength( &r->ranges ); res->name = string_dup_measure ( r->name, NULL ); VectorInit ( &res->skip_ranges, 0, 5 ); /* walk the ranges-Vector of the reference-region */ for ( i = 0; i < n; ++i ) { const struct reference_range * rr = VectorGet ( &( r->ranges ), i ); /* walk the skip-Vector of the reference-range */ uint32_t j, n1 = VectorLength( &rr->skip ); for ( j = 0; j < n1; ++j ) { const struct skip_range * sr = VectorGet ( &( rr->skip ), j ); if ( sr != NULL ) { struct skip_range * csr = make_skip_range( sr->start, sr->end ); if ( csr != NULL ) VectorAppend ( &( res->skip_ranges ), NULL, csr ); } } } res->current_id = 0; res->current_skip_range = VectorGet ( &( res->skip_ranges ), 0 ); } return res; }
/* AddSearchPath * add a search path to loader for locating library files */ LIB_EXPORT rc_t CC KDyldVAddSearchPath ( KDyld *self, const char *path, va_list args ) { rc_t rc; if ( self == NULL ) rc = RC ( rcFS, rcDylib, rcUpdating, rcSelf, rcNull ); else { KDirectory *wd; rc = KDirectoryNativeDir ( & wd ); if ( rc == 0 ) { const KDirectory *dir; rc = KDirectoryVOpenDirRead ( wd, & dir, false, path, args ); if ( rc == 0 ) { rc = VectorAppend ( & self -> search, NULL, dir ); if ( rc != 0 ) KDirectoryRelease ( dir ); } KDirectoryRelease ( wd ); } } return rc; }
LIB_EXPORT rc_t CC VNamelistAppendString( VNamelist *self, const String * src ) { rc_t rc; if ( self == NULL ) rc = RC ( rcCont, rcNamelist, rcInserting, rcSelf, rcNull ); else { if ( src == NULL ) rc = RC( rcCont, rcNamelist, rcInserting, rcString, rcNull ); else if ( src->addr == NULL || src->len == 0 ) rc = RC( rcCont, rcNamelist, rcInserting, rcString, rcEmpty ); else { char* my_copy = string_dup ( src->addr, src->len ); if ( my_copy == NULL ) rc = RC( rcCont, rcNamelist, rcInserting, rcMemory, rcExhausted ); else { rc = VectorAppend( &(self->name_vector), NULL, my_copy ); if ( rc != 0 ) free ( my_copy ); } } } return rc; }
static void ProcessWellFormedWord(char *word, article *a, hashset *stopWords, hashset *wordHash, hashset *articlesSeen) { currWord w; char* word2 = strdup(word); if(HashSetLookup(stopWords, &word2) == NULL) { //not a stopword w.thisWord = word2; VectorNew(&w.articles, sizeof(article),NULL, 100); currWord* elemAddr = (currWord*)HashSetLookup(wordHash,&w); if(elemAddr == NULL){ // Hasn't been seen a->numOccurrences = 1; VectorAppend(&w.articles, a); HashSetEnter(wordHash, &w); } else { UpdateOccurences(&elemAddr->articles,a); // we just need to update, not add // clean up free(word2); VectorDispose(&w.articles); } } else { free(word2); // free stop word } }
static void CC on_merge( void *item, void *data ) { on_merge_ctx * omc = data; if ( item != NULL ) { merge_data * md = calloc( 1, sizeof * md ); if ( md != NULL ) { rc_t rc; KThread * thread; md -> cmn = omc -> cmn; md -> files = item; md -> idx = omc -> idx; rc = KThreadMake( &thread, merge_thread_func, md ); if ( rc != 0 ) ErrMsg( "KThreadMake( on_merge #%d ) -> %R", omc -> idx, rc ); else { rc = VectorAppend( &omc -> threads, NULL, thread ); if ( rc != 0 ) ErrMsg( "VectorAppend( merge-thread #%d ) -> %R", omc -> idx, rc ); } } } omc -> idx ++; }
static void ParseArticle(rssDatabase *db, const char *articleTitle, const char *articleURL) { url u; urlconnection urlconn; streamtokenizer st; int articleID; URLNewAbsolute(&u, articleURL); rssNewsArticle newsArticle = { articleTitle, u.serverName, u.fullName }; pthread_mutex_t *articlesLock = &(db->locks.articlesVectorLock); pthread_mutex_lock(articlesLock); if (VectorSearch(&db->previouslySeenArticles, &newsArticle, NewsArticleCompare, 0, false) >= 0) { pthread_mutex_unlock(articlesLock); printf("[Ignoring \"%s\": we've seen it before.]\n", articleTitle); URLDispose(&u); return; } pthread_mutex_unlock(articlesLock); lockConnection(db,u.serverName); URLConnectionNew(&urlconn, &u); switch (urlconn.responseCode) { case 0: printf("Unable to connect to \"%s\". Domain name or IP address is nonexistent.\n", articleURL); break; case 200: //printf("[%s] Ready to Index \"%s\"\n", u.serverName, articleTitle); pthread_mutex_lock(articlesLock); printf("[%s] Indexing \"%s\"\n", u.serverName, articleTitle); NewsArticleClone(&newsArticle, articleTitle, u.serverName, u.fullName); VectorAppend(&db->previouslySeenArticles, &newsArticle); articleID = VectorLength(&db->previouslySeenArticles) - 1; pthread_mutex_unlock(articlesLock); STNew(&st, urlconn.dataStream, kTextDelimiters, false); ScanArticle(&st, articleID, &db->indices, &db->stopWords, &(db->locks.indicesHashSetLock),&(db->locks.stopWordsHashSetLock) ); STDispose(&st); break; case 301: case 302:{ // just pretend we have the redirected URL all along, though index using the new URL and not the old one... int newURLLength = strlen(urlconn.newUrl)+1; char newURLBuffer[newURLLength]; strcpy(newURLBuffer, urlconn.newUrl); URLConnectionDispose(&urlconn); unlockConnection(db,u.serverName); URLDispose(&u); ParseArticle(db, articleTitle, newURLBuffer); return; } default: printf("Unable to pull \"%s\" from \"%s\". [Response code: %d] Punting...\n", articleTitle, u.serverName, urlconn.responseCode); break; } URLConnectionDispose(&urlconn); unlockConnection(db,u.serverName); URLDispose(&u); }
static rc_t matcher_make_column_matrix( p_mcol col ) { rc_t rc = 0; uint32_t src_idx, src_len; src_len = VectorLength( &(col->src_types) ); for ( src_idx = 0; src_idx < src_len && rc == 0; ++src_idx ) { p_mtype src_type = (p_mtype) VectorGet ( &(col->src_types), src_idx ); if ( src_type ) { uint32_t dst_idx, dst_len; dst_len = VectorLength( &(col->dst_types) ); for ( dst_idx = 0; dst_idx < dst_len && rc == 0; ++dst_idx ) { p_mtype dst_type = (p_mtype) VectorGet ( &(col->dst_types), dst_idx ); if ( dst_type ) { p_mpair pair = matcher_init_pair( src_type, dst_type ); if ( pair != NULL ) rc = VectorAppend( &(col->pairs), NULL, pair ); else rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted ); } } } } return rc; }
/* Open * open cursor, resolving schema * for the set of opened columns * * NB - there is no corresponding "Close" * use "Release" instead. */ static rc_t VProdResolveAddShallowTriggers ( const VProdResolve *self, const STable *stbl ) { rc_t rc; VCursor *curs; uint32_t i = VectorStart ( & stbl -> prod ); uint32_t end = i + VectorLength ( & stbl -> prod ); for ( rc = 0, curs = self -> curs; ( rc == 0 || self -> ignore_column_errors ) && i < end; ++ i ) { SProduction *sprod = VectorGet ( & stbl -> prod, i ); if ( sprod != NULL && sprod -> trigger ) { VProduction *prod = NULL; rc = VProdResolveSProduction ( self, & prod, sprod ); if ( rc == 0 && prod != NULL ) rc = VectorAppend ( & curs -> trig, NULL, prod ); } } if ( self -> ignore_column_errors ) return 0; return rc; }
rc_t run_sorter_pool( const sorter_params * params ) { rc_t rc = 0; uint64_t row_count = find_out_row_count( params ); if ( row_count == 0 ) { rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid ); ErrMsg( "multi_threaded_make_lookup: row_count == 0!" ); } else { cmn_params cp; Vector threads; KThread * progress_thread = NULL; uint32_t prefix = 1; multi_progress progress; init_progress_data( &progress, row_count ); VectorInit( &threads, 0, params->num_threads ); init_cmn_params( &cp, params, row_count ); if ( params->show_progress ) rc = start_multi_progress( &progress_thread, &progress ); while ( rc == 0 && cp.first < row_count ) { sorter_params * sp = calloc( 1, sizeof *sp ); if ( sp != NULL ) { init_sorter_params( sp, params, prefix++ ); rc = make_raw_read_iter( &cp, &sp->src ); if ( rc == 0 ) { KThread * thread; if ( params->show_progress ) sp->sort_progress = &progress.progress_rows; rc = KThreadMake( &thread, sort_thread_func, sp ); if ( rc != 0 ) ErrMsg( "KThreadMake( sort-thread #%d ) -> %R", prefix - 1, rc ); else { rc = VectorAppend( &threads, NULL, thread ); if ( rc != 0 ) ErrMsg( "VectorAppend( sort-thread #%d ) -> %R", prefix - 1, rc ); } } cp.first += cp.count; } } join_and_release_threads( &threads ); /* all sorter-threads are done now, tell the progress-thread to terminate! */ join_multi_progress( progress_thread, &progress ); rc = merge_pool_files( params ); } return rc; }
static rc_t add_clone_to_reply_obj_list( reply_obj_list * list, const reply_obj * src ) { reply_obj * clone; rc_t rc = clone_reply_obj( src, &clone ); if ( rc == 0 ) rc = VectorAppend( &list->v, NULL, clone ); return rc; }
static void TestAppend(vector *alphabet) { char ch; int i; for (ch = 'A'; ch <= 'Z'; ch++) // Start with letters of alphabet VectorAppend(alphabet, &ch); fprintf(stdout, "First, here is the alphabet: "); VectorMap(alphabet, PrintChar, stdout); for (i = 0; i < 10; i++) { // Append digit characters ch = '0' + i; // convert int to ASCII digit character VectorAppend(alphabet, &ch); } fprintf(stdout, "\nAfter append digits: "); VectorMap(alphabet, PrintChar, stdout); }
/* * helper-function for: col_defs_parse_string / col_defs_extract_from_table * - creates a column-definition by the column-name * - adds the definition to the column-definition-vector */ static rc_t col_defs_append_col( col_defs* defs, const char* name ) { rc_t rc; p_col_def new_col = col_defs_init_col( name ); if ( new_col == NULL ) rc = RC( rcVDB, rcNoTarg, rcParsing, rcMemory, rcExhausted ); else rc = VectorAppend( &(defs->cols), NULL, new_col ); return rc; }
/* * adds a entry into the redact-val-list */ rc_t redact_vals_add( redact_vals* vals, const char* name, const uint32_t len, const char* value ) { rc_t rc; p_redact_val new_val = redact_val_init( name, len, value ); if ( new_val == NULL ) rc = RC( rcVDB, rcNoTarg, rcParsing, rcMemory, rcExhausted ); else rc = VectorAppend( &(vals->vals), NULL, new_val ); return rc; }
static rc_t rgn_vector_move( Vector * src, Vector * dst ) { rc_t rc = 0; while ( VectorLength( src ) > 0 && rc == 0 ) { region *ptr; rc = VectorRemove ( src, 0, (void**)&ptr ); if ( rc == 0 ) rc = VectorAppend ( dst, NULL, ptr ); } return rc; }
static void UpdateOccurences(vector *articles, article *a) { int index = VectorSearch(articles, a, ArticleCompare, 0, false); if(index==-1) { a->numOccurrences = 1; VectorAppend(articles, a); } else { article* currArt = (article*)VectorNth(articles, index); currArt->numOccurrences++; } }
void HashSetEnter(hashset *h, const void *elemAddr){ assert(elemAddr != NULL && "elemAddr can't be NULL"); int hashCode = h->hashfn(elemAddr, h->numBuckets); vector *vAddress; assert(hashCode >= 0 && hashCode < h->numBuckets && "not possible to insert the specified element into the specified hashset"); vAddress = h->buckets + hashCode; int position = VectorSearch(vAddress, elemAddr, h->comparefn, 0, false); if (position == -1) VectorAppend(vAddress, elemAddr); else VectorReplace(vAddress, elemAddr, position); }
static void indexWord(vector *counters, articleData *article) { wordCounter indexEntry; indexEntry.articleItem = article; int elemPosition = VectorSearch(counters, &indexEntry, FindArticleRecordCmpFn, 0, false); if(elemPosition == -1) { indexEntry.count = 1; VectorAppend(counters, &indexEntry); } else { wordCounter* record = VectorNth(counters, elemPosition); record->count++; } }
static void indexWord(vector *data,articleData *article){ wordCounter indexEntry; indexEntry.article = article; int elemPosition = VectorSearch(data, &indexEntry,FindArticleRecordCmpFn, 0, false); if(elemPosition == -1){ indexEntry.counter = 1; VectorAppend(data,&indexEntry); }else { wordCounter* findRecord=VectorNth(data,elemPosition); findRecord->counter++; } }
static p_col_def vdcd_append_col( col_defs* defs, const char* name ) { p_col_def new_col = vdcd_init_col( name, defs->str_limit ); if ( new_col != NULL ) { if ( VectorAppend( &(defs->cols), NULL, new_col ) == 0 ) { int len = string_size( name ); if ( len > defs->max_colname_chars ) defs->max_colname_chars = len; } } return new_col; }
void HashSetEnter(hashset *h, const void *elemAddr) { assert(elemAddr!=NULL); void *find=HashSetLookup(h, elemAddr); if(find) { memcpy(find, elemAddr, h->elemSize); } else { vector* v=HashSetElemVector(h, elemAddr); VectorAppend(v, elemAddr); h->count++; } }
static void InsertPermutationOfNumbers(vector *numbers, long n, long d) { long k; long residue; fprintf(stdout, "Generating all of the numbers between 0 and %ld (using some number theory). ", d - 1); fflush(stdout); // force echo to the screen... for (k = 0; k < d; k++) { residue = (long) (((long long)k * (long long) n) % d); VectorAppend(numbers, &residue); } assert(VectorLength(numbers) == d); fprintf(stdout, "[All done]\n"); fflush(stdout); }
static rc_t make_cursor_ids( Vector * cursor_id_vector, walker_col_ids ** cursor_ids ) { rc_t rc; walker_col_ids * ids = malloc( sizeof * ids ); if ( ids == NULL ) rc = RC ( rcApp, rcNoTarg, rcOpening, rcMemory, rcExhausted ); else { rc = VectorAppend ( cursor_id_vector, NULL, ids ); if ( rc != 0 ) free( ids ); else *cursor_ids = ids; } return rc; }
static rc_t get_column_specs ( const vtblcp_parms *pb, Vector *v, const VTable *stbl, VTable *dtbl ) { rc_t rc; /* always prepare the vector */ VectorInit ( v, 0, pb -> column_cnt ); /* unable at this moment to auto-determine column list */ if ( pb -> column_cnt == 0 ) { rc = RC ( rcExe, rcSchema, rcEvaluating, rcFunction, rcUnsupported ); LOGERR ( klogInt, rc, "failed to determine column specs" ); } else { uint32_t i; /* process command line arguments */ for ( rc = 0, i = 0; i < pb -> column_cnt; ++ i ) { const char *src = pb -> columns [ i ]; char *dst = malloc ( strlen ( src ) + 2 ); if ( dst == NULL ) { rc = RC ( rcExe, rcString, rcAllocating, rcMemory, rcExhausted ); break; } strcpy ( dst, src ); rc = VectorAppend ( v, NULL, dst ); if ( rc != 0 ) { free ( dst ); break; } } /* failure */ if ( rc != 0 ) VectorWhack ( v, free_column_spec, NULL ); } return rc; }
static void ParseArticle(const char *articleURL, const char *articleTitle, rssData *allData) { url u; urlconnection urlconn; streamtokenizer st; int articleIndex; URLNewAbsolute(&u, articleURL); /* check to see if we've previously scanned the article. If the article we're processing * has already been scanned release the url and return */ article a = {articleURL, articleTitle, u.serverName}; if(VectorSearch(&allData->explored, &a, ArticleCmp, 0, false) >= 0) { printf("[Pass. article already indexed: \"%s\"]\n", articleTitle); URLDispose(&u); return; } URLConnectionNew(&urlconn, &u); switch (urlconn.responseCode) { case 0: printf("Unable to connect to \"%s\". Domain name or IP address is nonexistent.\n", articleURL); break; case 200: printf("Scanning \"%s\" from \"http://%s\"\n", articleTitle, u.serverName); STNew(&st, urlconn.dataStream, kTextDelimiters, false); PersistArticle(&a, articleURL, articleTitle, u.serverName); VectorAppend(&allData->explored, &a); articleIndex = VectorLength(&allData->explored)-1; ScanArticle(&st, &a, articleIndex, allData); STDispose(&st); break; case 301: case 302: // just pretend we have the redirected URL all along, though index using the new URL and not the old one... ParseArticle(urlconn.newUrl, articleTitle, allData ); break; default: printf("Unable to pull \"%s\" from \"%s\". [Response code: %d] Punting...\n", articleTitle, u.serverName, urlconn.responseCode); break; } URLConnectionDispose(&urlconn); URLDispose(&u); }
static void NewThreadParseArticle(rssDatabase *db, const char *articleTitle, const char *articleURL){ threadData newThreadData; newThreadData.arg = malloc(sizeof(threadArguments)); newThreadData.arg->db = db; newThreadData.arg->title = strdup(articleTitle); newThreadData.arg->URL = strdup(articleURL); int tID = VectorLength(&db->threads); VectorAppend(&db->threads, &newThreadData); threadData *threadP = VectorNth(&db->threads, tID); if(pthread_create(&threadP->threadID, NULL, PthreadParseArticle, (void *)threadP->arg)!=0){ printf("Error, thread cannot be created"); fflush(stdout); } }
static void UpdateIndices(vector *articlesForWord, int articleIndex){ // initialize a wordcountEntry with the articleIndex and a wordcount of 0 wordcountEntry newWordcount; newWordcount.articleIndex = articleIndex; newWordcount.wordcount = 0; int idx = VectorSearch( articlesForWord, &newWordcount, WordcountEntryCmp, 0, false); // if the wordcountEntry isn't in the vector, add the entry if (idx == -1) { VectorAppend( articlesForWord, &newWordcount); } else { wordcountEntry *found = (wordcountEntry*)VectorNth(articlesForWord, idx); found->wordcount++; } }
/* AddLib * adds a dynamic library to end of ordered set * * "lib" [ IN ] - library returned from KDyldLoadLib */ static rc_t KDlsetAddLibInt ( KDlset *self, KDylib *lib ) { uint32_t idx; rc_t rc = VectorAppend ( & self -> ord, & idx, lib ); if ( rc == 0 ) { void *ignore; rc = VectorInsertUnique ( & self -> name, lib, NULL, KDylibSort ); if ( rc == 0 ) return 0; VectorSwap ( & self -> ord, idx, NULL, & ignore ); } return rc; }
void WordCountEnter(hashset *wordCount, const char *word, const char *articleTitle, const char *articleURL) { /** * Three possible cases: * 1. Word has not been entered * 2. Word has been entered, but word/article combination has not * 3. Word/article combination has been entered */ wordSet *existingWord = (wordSet *) HashSetLookup(wordCount, &word); // ensure that the word exists in the hashset if (existingWord == NULL) { wordSet ws; ws.word = strdup(word); VectorNew(&ws.occ, sizeof(articleCount), articleCountFreeFn, 25); HashSetEnter(wordCount, &ws); } // an entry for the word should always exist now existingWord = (wordSet *) HashSetLookup(wordCount, &word); assert(existingWord != NULL); // now either add the article to the word count vector or increment its current count articleCount articleKey = { { (char *) articleTitle, (char *) articleURL }, 1 }; int existingArticleIndex = VectorSearch(&existingWord->occ, &articleKey, articleCountCompareFn, 0, false); if (existingArticleIndex == -1) { // word/article pairing is new, append it to the vector with a count of 1 articleCount newArticle; (newArticle.source).title = strdup(articleTitle); (newArticle.source).url = strdup(articleURL); newArticle.count = 1; VectorAppend(&existingWord->occ, &newArticle); } else { // word/article pairing exists, increment its count articleCount *existingArticle = (articleCount *) VectorNth(&existingWord->occ, existingArticleIndex); existingArticle->count++; } }
static rc_t redactable_types_2_type_id_vector( const VSchema * s, const char * redactable_types, Vector * id_vector ) { const KNamelist *r_types; rc_t rc; if ( redactable_types == NULL || s == NULL || id_vector == NULL ) return RC( rcExe, rcNoTarg, rcResolving, rcParam, rcNull ); rc = nlt_make_namelist_from_string( &r_types, redactable_types ); if ( rc == 0 ) { uint32_t count, idx; rc = KNamelistCount( r_types, &count ); if ( rc == 0 && count > 0 ) for ( idx = 0; idx < count && rc == 0; ++idx ) { const char *name; rc = KNamelistGet( r_types, idx, &name ); if ( rc == 0 ) { VTypedecl td; rc = VSchemaResolveTypedecl ( s, &td, "%s", name ); if ( rc == 0 ) { uint32_t *id = malloc( sizeof *id ); if ( id != NULL ) { *id = td.type_id; rc = VectorAppend ( id_vector, NULL, id ); } else rc = RC( rcExe, rcNoTarg, rcResolving, rcMemory, rcExhausted ); } } } KNamelistRelease( r_types ); } return rc; }