void nTM_each_document(WordIndex *words, unsigned int length) { // Increment the total number of documents seen _static_nTM->documents++; // Create a new SparseCount for N^d_z (count of topics in this document) count_list_add(_static_nTM->ndzs); for (int i=0; i<length; i++) { // Sample a topic from P(z|w) TopicIndex assigned_topic = nTM_sample_topic(_static_nTM, words[i], _static_nTM->documents-1); // If we sampled the new topic, create it first if (assigned_topic == _static_nTM->topics) { nTM_create_topic(_static_nTM); } nTM_assign_topic(_static_nTM,words[i],assigned_topic,_static_nTM->documents); } progressbar_inc(_static_progressbar); if (_static_nTM->documents % _static_nTM->interval == 0) { progressbar_finish(_static_progressbar); nTM_save_assignments(_static_nTM); _static_progressbar = progressbar_new("Training", _static_nTM->interval); } }
/** *Example for statusbar and progressbar usage **/ int main(void) { // Status bar statusbar *status = statusbar_new("Indeterminate"); for (int i=0; i<30; i++) { usleep(SLEEP_MS); statusbar_inc(status); } statusbar_finish(status); status = statusbar_new("Status bar with a really long label"); for (int i=0; i<10; i++) { usleep(SLEEP_MS); statusbar_inc(status); } statusbar_finish(status); status = statusbar_new_with_format("Custom","(|)|"); for (int i=0; i<30; i++) { usleep(SLEEP_MS); statusbar_inc(status); } statusbar_finish(status); // Progress bar int max = 240; progressbar *progress = progressbar_new("Smooth",max); for(int i=0;i<max;i++) { usleep(SLEEP_MS); progressbar_inc(progress); } progressbar_finish(progress); progress = progressbar_new("Three Second Task with a long label",3); for(int i=0;i<3;i++) { progressbar_inc(progress); sleep(1); } progressbar_finish(progress); progress = progressbar_new("Fast",100); for(int i=0;i<100;i++) { usleep(SLEEP_MS); progressbar_inc(progress); } progressbar_finish(progress); }
void CW_train(CW *model) { srand(time(NULL)); static_cw_model = model; static_progress = progressbar_new("Building Representations", model->corpus->document_count); target_corpus_each_document(model->corpus, &CW_each_document); progressbar_finish(static_progress); int iteration_count = 50; progressbar *iteration_progress = progressbar_new("Iterating", iteration_count); for (int i=0; i<iteration_count; i++) { CW_iteration(model); progressbar_inc(iteration_progress); } progressbar_finish(iteration_progress); static_cw_model = NULL; CW_save_target_wordmap(model); }
void CW_each_document(unsigned int target, unsigned int *words, unsigned int length) { if (static_cw_model->skip_documents > 0) { static_cw_model->skip_documents--; static_cw_model->document_index++; return; } if(length < 1) return; // Look up the target index for this +target+ hash_element *element = hash_get(static_cw_model->wordmap_to_target, target); int index = 0; // +index+ is the index in model->targets for this target if (element != NULL) { // Success! We've seen this word before. index = element->value; } else { // +target+ is a new word, so add it. index = static_cw_model->num_targets++; int category = static_cw_model->num_categories++; hash_add(static_cw_model->wordmap_to_target, target, index); unsigned_array_set(static_cw_model->assignments, index, category); static_cw_model->targets[index] = hash_new(32); } // Update the frequency counts for this target for (int i=0; i<length; i++) { //model->targets[i] is f_ij, where i is the target word and j is the context word hash_update(static_cw_model->targets[index], words[i], 1); static_cw_model->f_xx++; // Also update the frequency count for this context word hash_update(static_cw_model->context_counts, words[i], 1); } progressbar_inc(static_progress); static_cw_model->document_index++; }