示例#1
0
void DebugLib::PrintSentencesPerplexity(const std::vector<std::vector<int> > &Sentences, const NHPYLM &LanguageModel)
{
  int WHPYLMContextLenght = LanguageModel.GetWHPYLMOrder() - 1;
  double LoglikelihoodSum = 0;
  int NumWords = 0;
  for (const std::vector<int> &Sentence : Sentences) {
    LoglikelihoodSum += LanguageModel.WordSequenceLoglikelihood(Sentence);
    NumWords += Sentence.size() - WHPYLMContextLenght;
  }
  std::cout << std::setprecision(2) << std::fixed << " Perplexity: " << exp(-LoglikelihoodSum / NumWords) << std::endl << std::endl;
}
NHPYLMFst::NHPYLMFst(const NHPYLM &LanguageModel_, int SentEndWordId_, const vector< bool > &ActiveWords_) :
  LanguageModel(LanguageModel_),
  SentEndWordId(SentEndWordId_),
  CHPYLMOrder(LanguageModel_.GetCHPYLMOrder()),
  WHPYLMOrder(LanguageModel_.GetWHPYLMOrder()),
  StartContextId(LanguageModel_.GetContextId(std::vector<int>(WHPYLMOrder - 1, SentEndWordId_))),
  FinalContextId(LanguageModel_.GetFinalContextId()),
  FSTProperties(fst::kOEpsilons | fst::kILabelSorted | fst::kOLabelSorted),
  FSTType("vector"),
  ActiveWords(ActiveWords_),
  FallbackSymbolId(PHI_SYMBOLID),
  Arcs(LanguageModel_.GetFinalContextId() + 1)
{
}
示例#3
0
void DebugLib::PrintLanguageModelStats(const NHPYLM &LanguageModel)
{
  if(LanguageModel.GetCHPYLMOrder() > 0) {
    std::cout << std::setprecision(2) << " CHPYLM statistics:";
    PrintVectorOfInts(LanguageModel.GetTotalCountPerLevelFor("CHPYLM", "Context"), 8, "\n  Contexts:      ", "");
    PrintVectorOfInts(LanguageModel.GetTotalCountPerLevelFor("CHPYLM", "Table"),   8, "\n  Tables:        ", "");
    PrintVectorOfInts(LanguageModel.GetTotalCountPerLevelFor("CHPYLM", "Word"),    8, "\n  Characters:    ", "");
    PrintVectorOfDoubles(LanguageModel.GetNHPYLMParameters().CHPYLMConcentration,  8, "\n  Concentration: ", "");
    PrintVectorOfDoubles(LanguageModel.GetNHPYLMParameters().CHPYLMDiscount,       8, "\n  Discount:      ", "");
    std::cout << "\n";
  }
  if(LanguageModel.GetWHPYLMOrder() > 0) {
    std::cout << " WHPYLM statistics:";
    PrintVectorOfInts(LanguageModel.GetTotalCountPerLevelFor("WHPYLM", "Context"), 8, "\n  Contexts:      ", "");
    PrintVectorOfInts(LanguageModel.GetTotalCountPerLevelFor("WHPYLM", "Table"),   8, "\n  Tables:        ", "");
    PrintVectorOfInts(LanguageModel.GetTotalCountPerLevelFor("WHPYLM", "Word"),    8, "\n  Words:         ", "");
    PrintVectorOfDoubles(LanguageModel.GetNHPYLMParameters().WHPYLMConcentration,  8, "\n  Concentration: ", "");
    PrintVectorOfDoubles(LanguageModel.GetNHPYLMParameters().WHPYLMDiscount,       8, "\n  Discount:      ", "");
    std::cout << "\n";
  }
  std::cout << std::endl;
}