TIMED_TEST(LexiconTests, initializerListTest_Lexicon, TEST_TIMEOUT_DEFAULT) {
    std::initializer_list<std::string> lexlist = {"sixty", "seventy"};
    std::initializer_list<std::string> lexallwords = {
        "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy"
    };

    Lexicon lex {"ten", "twenty", "thirty"};
    assertEqualsString("init list Lexicon", "{\"ten\", \"thirty\", \"twenty\"}", lex.toString());
    assertEqualsInt("init list Lexicon size", 3, lex.size());
    assertTrue("init list Lexicon contains ten", lex.contains("ten"));
    assertTrue("init list Lexicon contains twenty", lex.contains("twenty"));
    assertTrue("init list Lexicon contains thirty", lex.contains("thirty"));
    assertFalse("init list Lexicon contains forty", lex.contains("forty"));
    assertFalse("init list Lexicon contains fifty", lex.contains("fifty"));

    lex += {"forty", "fifty"};
    assertEqualsString("after += Lexicon", "{\"fifty\", \"forty\", \"ten\", \"thirty\", \"twenty\"}", lex.toString());
    assertEqualsInt("after += Lexicon size", 5, lex.size());
    assertTrue("init list Lexicon contains ten", lex.contains("ten"));
    assertTrue("init list Lexicon contains twenty", lex.contains("twenty"));
    assertTrue("init list Lexicon contains thirty", lex.contains("thirty"));
    assertTrue("init list Lexicon contains forty", lex.contains("forty"));
    assertTrue("init list Lexicon contains fifty", lex.contains("fifty"));
    assertFalse("init list Lexicon contains sixty", lex.contains("sixty"));
    assertFalse("init list Lexicon contains seventy", lex.contains("seventy"));

    Lexicon lex2 = (lex + lexlist);
    assertEqualsString("after += Lexicon", "{\"fifty\", \"forty\", \"ten\", \"thirty\", \"twenty\"}", lex.toString());
    assertEqualsInt("after + Lexicon size", 5, lex.size());
    assertTrue("init list Lexicon contains ten", lex.contains("ten"));
    assertTrue("init list Lexicon contains twenty", lex.contains("twenty"));
    assertTrue("init list Lexicon contains thirty", lex.contains("thirty"));
    assertTrue("init list Lexicon contains forty", lex.contains("forty"));
    assertTrue("init list Lexicon contains fifty", lex.contains("fifty"));
    assertFalse("init list Lexicon contains sixty", lex.contains("sixty"));
    assertFalse("init list Lexicon contains seventy", lex.contains("seventy"));

    assertEqualsString("after + Lexicon 2", "{\"fifty\", \"forty\", \"seventy\", \"sixty\", \"ten\", \"thirty\", \"twenty\"}", lex2.toString());
    assertEqualsInt("after + Lexicon 2 size", 7, lex2.size());
    assertTrue("init list Lexicon contains ten", lex2.contains("ten"));
    assertTrue("init list Lexicon contains twenty", lex2.contains("twenty"));
    assertTrue("init list Lexicon contains thirty", lex2.contains("thirty"));
    assertTrue("init list Lexicon contains forty", lex2.contains("forty"));
    assertTrue("init list Lexicon contains fifty", lex2.contains("fifty"));
    assertTrue("init list Lexicon contains sixty", lex2.contains("sixty"));
    assertTrue("init list Lexicon contains seventy", lex2.contains("seventy"));
}
Пример #2
0
Lexicon<ComplexType> LexiconLearner::run(vector<list<string> > &sentences, int nbIterations, float dirichletPrior, bool verbose)
{
    SimpleType targetType("s", 0);

    for(int i = 0; i < nbIterations; i++)
    {
        // /*
        if(i > 0)
            verbose = false;
        // */

        Lexicon<ComplexType> counts;

        cout << "ITERATION "<<i+1<<endl;
        float logLikelihood = 0;

        // Parsing and counting
        for(unsigned int s = 0; s < sentences.size(); s++)
        {
            FrameString fs(mLex, sentences[s], targetType);
            if(verbose)
                cout << fs.toString() << endl;

            SPGParser ps(fs);
            float totalProba = ps.run();
            if(verbose)
                cout << "Probability : "<<totalProba<<endl;
            set<Assignment> as = ps.getAssignments();
            for(set<Assignment>::iterator it = as.begin();
                    it != as.end(); it++)
            {
                Assignment a = *it;
                float proba = ps.proba(a);
                list<string>::iterator word = sentences[s].begin();
                if(verbose)
                    cout << "Assignment (proba "<<proba<<"): "<<endl;
                for(Assignment::iterator typ = a.begin();
                        typ != a.end(); typ++)
                {
                    if(word != sentences[s].end())
                    {
                        if(verbose)
                            cout << *word << " -> "<< fs.getType(*typ).toString() << " (type id "<<*typ<<")"<<endl;
                        counts[*word].addCount(fs.getType(*typ), proba);
                        word++;
                    }
                }
            }
            if(verbose)
                cout << endl;
            else cout << "." << flush;

            logLikelihood += log(totalProba);
        }
        if(!verbose)
            cout << endl;

        // Normalizing
        counts.normalize(dirichletPrior);
        // Pruning
        counts.pruneZeros();

        cout << counts.toString();

        mLex = counts;
        
        cout << "Perplexity (before last step): "<<-logLikelihood<<endl;
    }

    return mLex;
}