TIMED_TEST(LexiconTests, initializerListTest_Lexicon, TEST_TIMEOUT_DEFAULT) { std::initializer_list<std::string> lexlist = {"sixty", "seventy"}; std::initializer_list<std::string> lexallwords = { "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy" }; Lexicon lex {"ten", "twenty", "thirty"}; assertEqualsString("init list Lexicon", "{\"ten\", \"thirty\", \"twenty\"}", lex.toString()); assertEqualsInt("init list Lexicon size", 3, lex.size()); assertTrue("init list Lexicon contains ten", lex.contains("ten")); assertTrue("init list Lexicon contains twenty", lex.contains("twenty")); assertTrue("init list Lexicon contains thirty", lex.contains("thirty")); assertFalse("init list Lexicon contains forty", lex.contains("forty")); assertFalse("init list Lexicon contains fifty", lex.contains("fifty")); lex += {"forty", "fifty"}; assertEqualsString("after += Lexicon", "{\"fifty\", \"forty\", \"ten\", \"thirty\", \"twenty\"}", lex.toString()); assertEqualsInt("after += Lexicon size", 5, lex.size()); assertTrue("init list Lexicon contains ten", lex.contains("ten")); assertTrue("init list Lexicon contains twenty", lex.contains("twenty")); assertTrue("init list Lexicon contains thirty", lex.contains("thirty")); assertTrue("init list Lexicon contains forty", lex.contains("forty")); assertTrue("init list Lexicon contains fifty", lex.contains("fifty")); assertFalse("init list Lexicon contains sixty", lex.contains("sixty")); assertFalse("init list Lexicon contains seventy", lex.contains("seventy")); Lexicon lex2 = (lex + lexlist); assertEqualsString("after += Lexicon", "{\"fifty\", \"forty\", \"ten\", \"thirty\", \"twenty\"}", lex.toString()); assertEqualsInt("after + Lexicon size", 5, lex.size()); assertTrue("init list Lexicon contains ten", lex.contains("ten")); assertTrue("init list Lexicon contains twenty", lex.contains("twenty")); assertTrue("init list Lexicon contains thirty", lex.contains("thirty")); assertTrue("init list Lexicon contains forty", lex.contains("forty")); assertTrue("init list Lexicon contains fifty", lex.contains("fifty")); assertFalse("init list Lexicon contains sixty", lex.contains("sixty")); assertFalse("init list Lexicon contains seventy", lex.contains("seventy")); assertEqualsString("after + Lexicon 2", "{\"fifty\", \"forty\", \"seventy\", \"sixty\", \"ten\", \"thirty\", \"twenty\"}", lex2.toString()); assertEqualsInt("after + Lexicon 2 size", 7, lex2.size()); assertTrue("init list Lexicon contains ten", lex2.contains("ten")); assertTrue("init list Lexicon contains twenty", lex2.contains("twenty")); assertTrue("init list Lexicon contains thirty", lex2.contains("thirty")); assertTrue("init list Lexicon contains forty", lex2.contains("forty")); assertTrue("init list Lexicon contains fifty", lex2.contains("fifty")); assertTrue("init list Lexicon contains sixty", lex2.contains("sixty")); assertTrue("init list Lexicon contains seventy", lex2.contains("seventy")); }
Lexicon<ComplexType> LexiconLearner::run(vector<list<string> > &sentences, int nbIterations, float dirichletPrior, bool verbose) { SimpleType targetType("s", 0); for(int i = 0; i < nbIterations; i++) { // /* if(i > 0) verbose = false; // */ Lexicon<ComplexType> counts; cout << "ITERATION "<<i+1<<endl; float logLikelihood = 0; // Parsing and counting for(unsigned int s = 0; s < sentences.size(); s++) { FrameString fs(mLex, sentences[s], targetType); if(verbose) cout << fs.toString() << endl; SPGParser ps(fs); float totalProba = ps.run(); if(verbose) cout << "Probability : "<<totalProba<<endl; set<Assignment> as = ps.getAssignments(); for(set<Assignment>::iterator it = as.begin(); it != as.end(); it++) { Assignment a = *it; float proba = ps.proba(a); list<string>::iterator word = sentences[s].begin(); if(verbose) cout << "Assignment (proba "<<proba<<"): "<<endl; for(Assignment::iterator typ = a.begin(); typ != a.end(); typ++) { if(word != sentences[s].end()) { if(verbose) cout << *word << " -> "<< fs.getType(*typ).toString() << " (type id "<<*typ<<")"<<endl; counts[*word].addCount(fs.getType(*typ), proba); word++; } } } if(verbose) cout << endl; else cout << "." << flush; logLikelihood += log(totalProba); } if(!verbose) cout << endl; // Normalizing counts.normalize(dirichletPrior); // Pruning counts.pruneZeros(); cout << counts.toString(); mLex = counts; cout << "Perplexity (before last step): "<<-logLikelihood<<endl; } return mLex; }