lOOOOOOOOOl ( lllO\ l\ OOO\ Ol ) { Cl\ ea\ r\ ( ) ; lO\ OlO\ l\ OO\ OOO :: Cr\ eat\ eIns\ tance\ ( ) ; lOOl\ OlOOOOO :: Ge\ t\ Ins\ ta\ nce ( ) . lO\ OlOl\ OOOlO ( this\ ) ; lO\ O\ lOl\ OOO\ ll = 0 ; }
void FNeuralNetLMBase::BatchSGDTrain(FNNLMDataReader &train_data, FNNLMDataReader &validation_data, const string &outbase, bool nce_ppl) { const size_t eos_widx = word_vocab_.eos_idx(); const size_t unk_widx = word_vocab_.unk_idx(); const vector<size_t> eos_fidx = { factor_vocab_.eos_idx() }; vector<pair<size_t, vector<size_t>>> sentence; double last_logp = -numeric_limits<double>::max(); double curr_logp = -numeric_limits<double>::max(); bool halve_alpha = false; // set the current learning rate. float curr_learning_rate = algopts_.init_learning_rate_; size_t sents_processed = 0; int iteration = 0; clock_t start_time = clock(); clock_t end_time = start_time; while (true) { cout << "******************************* ITERATION " << iteration++ << " *******************************" << endl; train_data.StartEpoch(); ResetActivations(); cout << "learning_rate = " << curr_learning_rate << endl; int bpos = 0; double logp = 0.0; nce_obj_ = 0; size_t ivcount = 0; size_t oovcount = 0; // NOTE: the vector "sentence" does not include </s> at the end! while (train_data.GetSentence(sentence)) { assert(!sentence.empty()); if (independent_) { ResetActivations(); } ForwardPropagate(eos_widx, eos_fidx); for (vector<pair<size_t, vector<size_t>>>::const_iterator it = sentence.begin(); it != sentence.end(); ++it) { // train all words even if it is an OOV since <unk> in the vocabulary if (!unk_ && it->first == unk_widx) { oovcount++; } else { logp += GetLogProb(it->first, !nce_); ivcount++; } BackPropagate(it->first, it->second); if (++bpos == algopts_.batch_size_) { FastUpdateWeightsMajor(curr_learning_rate); bpos = 0; } ForwardPropagate(it->first, it->second); } if (nce_) { logp += GetLogProb(eos_widx, false); } else { logp += GetLogProb(eos_widx, true); } ivcount++; BackPropagate(eos_widx, eos_fidx); sents_processed++; if ((sents_processed % 500) == 0) cout << "." << flush; } // Do the update for current epoch since the last minibatch. FastUpdateWeightsMajor(curr_learning_rate); bpos = 0; FastUpdateWeightsMinor(); cout << "\nnum IV words (including </s>) in training: " << ivcount << endl; cout << "number of OOV words in training: " << oovcount << endl; if (!nce()) { cout << "training entropy (base 2): " << -logp / log(2) / ivcount << endl; cout << "model perplexity on training: " << exp(-logp / ivcount) << endl; cout << "log-likelihood (base e) on training is: " << logp << endl; } else { cout << "NCE objective value on training is: " << nce_obj_ << endl; cout << "un-normalized training entropy (base 2): " << -logp / log(2) / ivcount << endl; cout << "unnormalied model perplexity on training: " << exp(-logp / ivcount) << endl; cout << "un-normalized log-likelihood (base e) on training is: " << logp << endl; } cout << "epoch finished" << endl << flush; if (!outbase.empty()) { if (debug_ > 0) { WriteLM(outbase + ".ITER_" + to_string(iteration - 1)); } } cout << "----------VALIDATION----------" << endl; double curr_logp = EvalLM(validation_data, nce_ppl); cout << "log-likelihood (base e) on validation is: " << curr_logp << endl; clock_t last_end_time = end_time; end_time = clock(); cout << "time elasped " << static_cast<double>(end_time - last_end_time) / CLOCKS_PER_SEC << " secs for this iteration out of " << static_cast<double>(end_time - start_time) / CLOCKS_PER_SEC << " secs in total." << endl; if (curr_logp < last_logp) { cout << "validation log-likelihood decrease; resetting parameters" << endl; RestoreLastParams(); } else { CacheCurrentParams(); } if (curr_logp * algopts_.min_improvement_ <= last_logp) { if (!halve_alpha) { halve_alpha = true; } else { if (!outbase.empty()) { WriteLM(outbase); } break; } } if (halve_alpha) { curr_learning_rate /= 2; } last_logp = curr_logp; } }