void EvaluateModel( const Config& config, const model& nn, propagator& prop_validation, const shared_ptr<Corpus>& test_corpus, const shared_ptr<Vocabulary>& vocab, double& current_learning_rate, double& current_validation_ll) { if (test_corpus->size() > 0) { double log_likelihood = 0.0; Matrix<double,Dynamic,Dynamic> scores(vocab->size(), config.minibatch_size); Matrix<double,Dynamic,Dynamic> output_probs(vocab->size(), config.minibatch_size); MinibatchExtractor extractor(test_corpus, vocab, config); cerr << endl; cerr << "Validation minibatches: " << endl; int num_batches = (test_corpus->size() - 1) / config.minibatch_size + 1; for (int batch = 0; batch < num_batches; batch++) { if (batch % 50 == 0) { cerr << batch << "... "; } data_size_t start_index = config.minibatch_size * batch; MatrixInt minibatch = extractor.extract(start_index); prop_validation.fProp(minibatch.topRows(config.ngram_size - 1)); // Do full forward prop through output word embedding layer start_timer(4); prop_validation.output_layer_node.param->fProp(prop_validation.second_hidden_activation_node.fProp_matrix, scores); stop_timer(4); // And softmax and loss. Be careful of short minibatch double minibatch_log_likelihood; start_timer(5); SoftmaxLogLoss().fProp( scores.leftCols(minibatch.cols()), minibatch.row(config.ngram_size - 1), output_probs, minibatch_log_likelihood); stop_timer(5); log_likelihood += minibatch_log_likelihood; } cerr << endl; cerr << "Validation log-likelihood: " << log_likelihood << endl; cerr << " perplexity: " << exp(-log_likelihood / test_corpus->size()) << endl; // If the validation perplexity decreases, halve the learning rate. if (current_validation_ll != 0 && log_likelihood < current_validation_ll) { current_learning_rate /= 2; } else { current_validation_ll = log_likelihood; if (config.model_output_file != "") { cerr << "Writing model to " << config.model_output_file << endl; ofstream fout(config.model_output_file); nn.write(fout); vocab->write(fout); cerr << "Done writing model" << endl; } } } }
void compute_validation_perplexity(int ngram_size, int output_vocab_size, int validation_minibatch_size, int validation_data_size, int num_validation_batches, param & myParam, propagator & prop_validation, Map< Matrix<int,Dynamic,Dynamic> > & validation_data, double & current_learning_rate, double & current_validation_ll) { double log_likelihood = 0.0; Matrix<double,Dynamic,Dynamic> scores(output_vocab_size, validation_minibatch_size); Matrix<double,Dynamic,Dynamic> output_probs(output_vocab_size, validation_minibatch_size); Matrix<int,Dynamic,Dynamic> minibatch(ngram_size, validation_minibatch_size); for (int validation_batch =0;validation_batch < num_validation_batches;validation_batch++) { int validation_minibatch_start_index = validation_minibatch_size * validation_batch; int current_minibatch_size = min(validation_minibatch_size, validation_data_size - validation_minibatch_start_index); minibatch.leftCols(current_minibatch_size) = validation_data.middleCols(validation_minibatch_start_index, current_minibatch_size); prop_validation.fProp(minibatch.topRows(ngram_size-1)); // Do full forward prop through output word embedding layer start_timer(4); if (prop_validation.skip_hidden) prop_validation.output_layer_node.param->fProp(prop_validation.first_hidden_activation_node.fProp_matrix, scores); else prop_validation.output_layer_node.param->fProp(prop_validation.second_hidden_activation_node.fProp_matrix, scores); stop_timer(4); // And softmax and loss. Be careful of short minibatch double minibatch_log_likelihood; start_timer(5); SoftmaxLogLoss().fProp(scores.leftCols(current_minibatch_size), minibatch.row(ngram_size-1), output_probs, minibatch_log_likelihood); stop_timer(5); log_likelihood += minibatch_log_likelihood; } cerr << "Validation log-likelihood: "<< log_likelihood << '\n'; cerr << " perplexity: "<< exp(-log_likelihood/validation_data_size) << '\n'; // If the validation perplexity decreases, halve the learning rate. if (current_validation_ll != 0.0 && log_likelihood < current_validation_ll && myParam.parameter_update != "ADA") { current_learning_rate /= 2; } current_validation_ll = log_likelihood; }