예제 #1
0
void EvaluateModel(
    const Config& config, const model& nn, propagator& prop_validation,
    const shared_ptr<Corpus>& test_corpus, const shared_ptr<Vocabulary>& vocab,
    double& current_learning_rate, double& current_validation_ll) {
  if (test_corpus->size() > 0) {
    double log_likelihood = 0.0;
    Matrix<double,Dynamic,Dynamic> scores(vocab->size(), config.minibatch_size);
    Matrix<double,Dynamic,Dynamic> output_probs(vocab->size(), config.minibatch_size);
    MinibatchExtractor extractor(test_corpus, vocab, config);

    cerr << endl;
    cerr << "Validation minibatches: " << endl;
    int num_batches = (test_corpus->size() - 1) / config.minibatch_size + 1;
    for (int batch = 0; batch < num_batches; batch++) {
      if (batch % 50 == 0) {
        cerr << batch << "... ";
      }

      data_size_t start_index = config.minibatch_size * batch;
      MatrixInt minibatch = extractor.extract(start_index);

      prop_validation.fProp(minibatch.topRows(config.ngram_size - 1));

      // Do full forward prop through output word embedding layer
      start_timer(4);
      prop_validation.output_layer_node.param->fProp(prop_validation.second_hidden_activation_node.fProp_matrix, scores);
      stop_timer(4);

      // And softmax and loss. Be careful of short minibatch
      double minibatch_log_likelihood;
      start_timer(5);
      SoftmaxLogLoss().fProp(
          scores.leftCols(minibatch.cols()),
          minibatch.row(config.ngram_size - 1),
          output_probs,
          minibatch_log_likelihood);
      stop_timer(5);
      log_likelihood += minibatch_log_likelihood;
    }

    cerr << endl;
    cerr << "Validation log-likelihood: " << log_likelihood << endl;
    cerr << "           perplexity:     " << exp(-log_likelihood / test_corpus->size()) << endl;

    // If the validation perplexity decreases, halve the learning rate.
    if (current_validation_ll != 0 && log_likelihood < current_validation_ll) {
      current_learning_rate /= 2;
    } else {
      current_validation_ll = log_likelihood;

      if (config.model_output_file != "") {
        cerr << "Writing model to " << config.model_output_file << endl;
        ofstream fout(config.model_output_file);
        nn.write(fout);
        vocab->write(fout);
        cerr << "Done writing model" << endl;
      }
    }
  }
}
예제 #2
0
void compute_validation_perplexity(int ngram_size, int output_vocab_size, int validation_minibatch_size, int validation_data_size, int num_validation_batches, param & myParam, propagator & prop_validation, Map< Matrix<int,Dynamic,Dynamic> > & validation_data, double & current_learning_rate, double & current_validation_ll)
{
    double log_likelihood = 0.0;

    Matrix<double,Dynamic,Dynamic> scores(output_vocab_size, validation_minibatch_size);
    Matrix<double,Dynamic,Dynamic> output_probs(output_vocab_size, validation_minibatch_size);
    Matrix<int,Dynamic,Dynamic> minibatch(ngram_size, validation_minibatch_size);

    for (int validation_batch =0;validation_batch < num_validation_batches;validation_batch++)
    {
        int validation_minibatch_start_index = validation_minibatch_size * validation_batch;
        int current_minibatch_size = min(validation_minibatch_size,
                                          validation_data_size - validation_minibatch_start_index);
        minibatch.leftCols(current_minibatch_size) = validation_data.middleCols(validation_minibatch_start_index,
                                                                                current_minibatch_size);
        prop_validation.fProp(minibatch.topRows(ngram_size-1));

        // Do full forward prop through output word embedding layer
        start_timer(4);
        if (prop_validation.skip_hidden)
            prop_validation.output_layer_node.param->fProp(prop_validation.first_hidden_activation_node.fProp_matrix, scores);
        else
            prop_validation.output_layer_node.param->fProp(prop_validation.second_hidden_activation_node.fProp_matrix, scores);
        stop_timer(4);

        // And softmax and loss. Be careful of short minibatch
        double minibatch_log_likelihood;
        start_timer(5);
        SoftmaxLogLoss().fProp(scores.leftCols(current_minibatch_size),
                                minibatch.row(ngram_size-1),
                                output_probs,
                                minibatch_log_likelihood);
        stop_timer(5);
        log_likelihood += minibatch_log_likelihood;
    }

    cerr << "Validation log-likelihood: "<< log_likelihood << '\n';
    cerr << "           perplexity:     "<< exp(-log_likelihood/validation_data_size) << '\n';

    // If the validation perplexity decreases, halve the learning rate.
    if (current_validation_ll != 0.0 && log_likelihood < current_validation_ll && myParam.parameter_update != "ADA")
    {
        current_learning_rate /= 2;
    }
    current_validation_ll = log_likelihood;
}