void Testing() { TSymbol p1[] = { 0,1,2,3,0,1,2, 0,1,1,3,0,1,2, 0,1,2,1,0,1,2, 0,1,2,3,1,1,2 }; auto obs = makeSamples(p1, 7, 7*4); auto model = HiddenMarkovModel(); auto top = ForwardTopology(6, 6, false); InitializeHiddenMarkovModelWithTopology(model, top, 4); auto learning = BaumWelchLearning(model, 5e-4, 0); auto rtol = learning.Run(obs); TSymbol test[] = { 0,1,2,3,0,1,2, 0,1,1,3,0,1,2, 3,3,3,3,1,1,1, 2,2,2,2,1,1,1, }; auto test2 = makeSamples(test, 7, 7*4); auto r1 = EvaluateModel(model, test2[0]); auto r2 = EvaluateModel(model, test2[1]); auto r3 = EvaluateModel(model, test2[2]); auto r4 = EvaluateModel(model, test2[3]); const auto TOL = 1e-8; // valores de referencia con Accord.NET assert(fabs(r1-(-3.9364056688035625)) < TOL); assert(fabs(r2-(-5.0350078739166229)) < TOL); assert(fabs(r3-(-32602.524455560128)) < TOL); assert(fabs(r4-(-19547.858747514234)) < TOL); std::cout << "TESTING FINALIZADO CON EXITO" << std::endl; }
void TrainModel() { long a; pthread_t *pt = (pthread_t *)malloc(num_threads * sizeof(pthread_t)); if (model_file[0] == 0) return; int iter = 0; FILE *t1 = fopen(model_file, "rb"); FILE *t2 = fopen(model_file_nnet, "rb"); if(t1 != NULL && t2 != NULL) { fclose(t1); fclose(t2); fprintf(stderr, "Restoring nnet from existing files %s, %s\n", model_file, model_file_nnet); LoadNnet(); } else { LearnVocabFromTrainFile(); if(maxent_hash_size) { maxent_hash_size *= 1000000; maxent_hash_size -= maxent_hash_size % vocab_size; } InitNet(); SaveNnet(); } if(test_file[0] != 0) { counter = 0; real sumlogprob = EvaluateModel(test_file, 1); fprintf(stderr, "Test entropy %f\n", sumlogprob/log10(2)/(real)counter); return; } if(gen > 0) { Sample(gen, 0); return; } else if(gen < 0) { while(1) { Sample(-gen, 1); } return; } fprintf(stderr, "Starting training using file %s\n", train_file); FILE *fi = fopen(valid_file, "rb"); valid_words = 0; while (1) { ReadWordIndex(fi); ++valid_words; if (feof(fi)) break; } valid_file_size = ftell(fi); fclose(fi); real old_entropy = 1e99; real entropy; real diff = 1e99; int retry = 0; int decay = 0; while(retry < max_retry) { if(iter != 0) { if(decay) { alpha /= 2.0; maxent_alpha /= 2.0; } word_count_actual = 0; counter = 0; start = clock(); for (a = 0; a < num_threads; a++) pthread_create(&pt[a], NULL, TrainModelThread, (void *)a); for (a = 0; a < num_threads; a++) pthread_join(pt[a], NULL); } fprintf(stderr, "Iteration %d\t", iter); sumlogprob_valid = 0; counter = 0; sumlogprob_valid = EvaluateModel(valid_file, 0); entropy = sumlogprob_valid/log10(2)/(real)counter; fprintf(stderr, "Valid Entropy %f", entropy); ++iter; diff = old_entropy/entropy; if (isnan(entropy) || isinf(entropy) || diff < stop) { if (decay == 1) { ++retry; fprintf(stderr, "\tRetry %d/%d", retry, max_retry); } else { decay = 1; fprintf(stderr, "\tDecay started"); } if(isnan(entropy) || isinf(entropy) || diff < reject_threshold) { fprintf(stderr, "\tNnet rejected"); FreeNnet(); int debug_ = debug_mode; debug_mode = 0; LoadNnet(); debug_mode = debug_; } } fprintf(stderr, "\n"); if(diff > 1.0) { SaveNnet(); old_entropy = entropy; } } }