Exemplo n.º 1
0
void Testing()
{
	TSymbol p1[] = 
	{ 
		0,1,2,3,0,1,2,
		0,1,1,3,0,1,2,
		0,1,2,1,0,1,2,
		0,1,2,3,1,1,2 
	};
	auto obs = makeSamples(p1, 7, 7*4);

	auto model = HiddenMarkovModel();
	auto top = ForwardTopology(6, 6, false);
	InitializeHiddenMarkovModelWithTopology(model, top, 4);
	auto learning = BaumWelchLearning(model, 5e-4, 0);		
	auto rtol = learning.Run(obs);

	TSymbol test[] = 
	{
		0,1,2,3,0,1,2,
		0,1,1,3,0,1,2,
		3,3,3,3,1,1,1,
		2,2,2,2,1,1,1,
	};
	auto test2 = makeSamples(test, 7, 7*4);
	auto r1 = EvaluateModel(model, test2[0]);
	auto r2 = EvaluateModel(model, test2[1]);
	auto r3 = EvaluateModel(model, test2[2]);
	auto r4 = EvaluateModel(model, test2[3]);
	const auto TOL = 1e-8;
	// valores de referencia con Accord.NET
	assert(fabs(r1-(-3.9364056688035625)) < TOL);
	assert(fabs(r2-(-5.0350078739166229)) < TOL);
	assert(fabs(r3-(-32602.524455560128)) < TOL);
	assert(fabs(r4-(-19547.858747514234)) < TOL);
	std::cout << "TESTING FINALIZADO CON EXITO" << std::endl;
}
Exemplo n.º 2
0
void TrainModel() {
  long a;
  pthread_t *pt = (pthread_t *)malloc(num_threads * sizeof(pthread_t));
  if (model_file[0] == 0) return;
  int iter = 0;

  FILE *t1 = fopen(model_file, "rb");
  FILE *t2 = fopen(model_file_nnet, "rb");
  if(t1 != NULL && t2 != NULL) {
    fclose(t1);
    fclose(t2);
    fprintf(stderr, "Restoring nnet from existing files %s, %s\n", model_file, model_file_nnet);
    LoadNnet();
  } else {
    LearnVocabFromTrainFile();
    if(maxent_hash_size) {
      maxent_hash_size *= 1000000;
      maxent_hash_size -= maxent_hash_size % vocab_size;
    }
    InitNet();
    SaveNnet();
  } 

  if(test_file[0] != 0) {
    counter = 0;
    real sumlogprob = EvaluateModel(test_file, 1);
    fprintf(stderr, "Test entropy %f\n", sumlogprob/log10(2)/(real)counter);
    return;
  }

  if(gen > 0) {
    Sample(gen, 0);
    return;
  } else if(gen < 0) {
    while(1) {
      Sample(-gen, 1);
    }
    return;
  }

  fprintf(stderr, "Starting training using file %s\n", train_file);

  FILE *fi = fopen(valid_file, "rb");
  valid_words = 0;
  while (1) {
    ReadWordIndex(fi);
    ++valid_words;
    if (feof(fi)) break;
  }    
  valid_file_size = ftell(fi);
  fclose(fi);

  real old_entropy = 1e99;
  real entropy;
  real diff = 1e99;
  int retry = 0;
  int decay = 0;
  while(retry < max_retry) {
    if(iter  != 0) {
      if(decay) {
	alpha /= 2.0;
	maxent_alpha /= 2.0;
      }
      word_count_actual = 0;
      counter = 0;
      start = clock();
      for (a = 0; a < num_threads; a++) pthread_create(&pt[a], NULL, TrainModelThread, (void *)a);
      for (a = 0; a < num_threads; a++) pthread_join(pt[a], NULL);
    }
    fprintf(stderr, "Iteration %d\t", iter);
    sumlogprob_valid = 0;
    counter = 0;
    sumlogprob_valid = EvaluateModel(valid_file, 0);
    entropy = sumlogprob_valid/log10(2)/(real)counter;
    fprintf(stderr, "Valid Entropy %f", entropy);
    ++iter;

    diff = old_entropy/entropy;  
    if (isnan(entropy) || isinf(entropy) || diff < stop) {
      if (decay == 1) {
	++retry;
	fprintf(stderr, "\tRetry %d/%d", retry, max_retry);
      } else {
	decay = 1;
	fprintf(stderr, "\tDecay started");
      }
      if(isnan(entropy) || isinf(entropy) || diff < reject_threshold) {
	fprintf(stderr, "\tNnet rejected");
	FreeNnet();
	int debug_ = debug_mode;
	debug_mode = 0;
	LoadNnet();
	debug_mode = debug_;
      }
    }
    fprintf(stderr, "\n");

    if(diff > 1.0) {  
      SaveNnet();
      old_entropy = entropy;
    }
  }
}