Beispiel #1
0
lOOOOOOOOOl
(
lllO\
l\
OOO\
Ol
)
{
Cl\
ea\
r\

(
)
;
lO\
OlO\
l\
OO\
OOO
::
Cr\
eat\
eIns\
tance\

(
)
;
lOOl\
OlOOOOO
::
Ge\
t\
Ins\
ta\
nce
(
)
.
lO\
OlOl\
OOOlO
(
this\

)
;
lO\
O\
lOl\
OOO\
ll
=
0
;
}
Beispiel #2
0
void FNeuralNetLMBase::BatchSGDTrain(FNNLMDataReader &train_data, FNNLMDataReader &validation_data,
                                     const string &outbase, bool nce_ppl) {
  const size_t eos_widx = word_vocab_.eos_idx();
  const size_t unk_widx = word_vocab_.unk_idx();
  const vector<size_t> eos_fidx = { factor_vocab_.eos_idx() };

  vector<pair<size_t, vector<size_t>>> sentence;

  double last_logp = -numeric_limits<double>::max();
  double curr_logp = -numeric_limits<double>::max();
  bool halve_alpha = false;
  // set the current learning rate.
  float curr_learning_rate = algopts_.init_learning_rate_;

  size_t sents_processed = 0;
  int iteration = 0;

  clock_t start_time = clock();
  clock_t end_time = start_time;
  while (true) {
    cout << "******************************* ITERATION " << iteration++ << " *******************************" << endl;

    train_data.StartEpoch();

    ResetActivations();

    cout << "learning_rate = " << curr_learning_rate << endl;

    int bpos = 0;
    double logp = 0.0;
    nce_obj_ = 0;
    size_t ivcount = 0;
    size_t oovcount = 0;
    // NOTE: the vector "sentence" does not include </s> at the end!
    while (train_data.GetSentence(sentence)) {
      assert(!sentence.empty());

      if (independent_) {
        ResetActivations();
      }
      ForwardPropagate(eos_widx, eos_fidx);
      for (vector<pair<size_t, vector<size_t>>>::const_iterator it = sentence.begin(); it != sentence.end(); ++it) {
        // train all words even if it is an OOV since <unk> in the vocabulary
        if (!unk_ && it->first == unk_widx) {
          oovcount++;
        } else {
          logp += GetLogProb(it->first, !nce_);
          ivcount++;
        }
        BackPropagate(it->first, it->second);
        if (++bpos == algopts_.batch_size_) {
          FastUpdateWeightsMajor(curr_learning_rate);
          bpos = 0;
        }
        ForwardPropagate(it->first, it->second);
      }
      if (nce_) {
        logp += GetLogProb(eos_widx, false);
      } else {
        logp += GetLogProb(eos_widx, true);
      }
      ivcount++;
      BackPropagate(eos_widx, eos_fidx);

      sents_processed++;
      if ((sents_processed % 500) == 0)
        cout << "." << flush;
    }
    // Do the update for current epoch since the last minibatch.
    FastUpdateWeightsMajor(curr_learning_rate);
    bpos = 0;
    FastUpdateWeightsMinor();

    cout << "\nnum IV words (including </s>) in training: " << ivcount << endl;
    cout << "number of OOV words in training: " << oovcount << endl;
    if (!nce()) {
      cout << "training entropy (base 2): " << -logp / log(2) / ivcount << endl;
      cout << "model perplexity on training: " << exp(-logp / ivcount) << endl;
      cout << "log-likelihood (base e) on training is: " << logp << endl;
    } else {
      cout << "NCE objective value on training is: " << nce_obj_ << endl;
      cout << "un-normalized training entropy (base 2): " << -logp / log(2) / ivcount << endl;
      cout << "unnormalied model perplexity on training: " << exp(-logp / ivcount) << endl;
      cout << "un-normalized log-likelihood (base e) on training is: " << logp << endl;
    }
    cout << "epoch finished" << endl << flush;

    if (!outbase.empty()) {
      if (debug_ > 0) {
        WriteLM(outbase + ".ITER_" + to_string(iteration - 1));
      }
    }

    cout << "----------VALIDATION----------" << endl;
    double curr_logp = EvalLM(validation_data, nce_ppl);
    cout << "log-likelihood (base e) on validation is: " << curr_logp << endl;

    clock_t last_end_time = end_time;
    end_time = clock();
    cout << "time elasped " 
        << static_cast<double>(end_time - last_end_time) / CLOCKS_PER_SEC << " secs for this iteration out of "
        << static_cast<double>(end_time - start_time) / CLOCKS_PER_SEC << " secs in total." << endl;

    if (curr_logp < last_logp) {
      cout << "validation log-likelihood decrease; resetting parameters" << endl;
      RestoreLastParams();
    } else {
      CacheCurrentParams();
    }

    if (curr_logp * algopts_.min_improvement_ <= last_logp) {
      if (!halve_alpha) {
        halve_alpha = true;
      } else {
        if (!outbase.empty()) {
          WriteLM(outbase);
        }
        break;
      }
    }

    if (halve_alpha) {
      curr_learning_rate /= 2;
    }

    last_logp = curr_logp;
  }
}