StdVectorFst Phonetisaurus::makeEpsMapper( ) { /* Generate a mapper FST to transform unwanted output symbols to the epsilon symbol. This can be used to remove unwanted symbols from the final result, but in tests was 7x slower than manual removal via the FstPathFinder object. */ StdVectorFst mfst; mfst.AddState(); mfst.SetStart(0); set<string>::iterator sit; for( size_t i=0; i< osyms->NumSymbols(); i++ ) { string sym = osyms->Find( i ); sit = skipSeqs.find( sym ); if( sit!=skipSeqs.end() ) mfst.AddArc( 0, StdArc( i, 0, 0, 0 ) ); else mfst.AddArc( 0, StdArc( i, i, 0, 0 ) ); } mfst.SetFinal(0, 0); ILabelCompare<StdArc> icomp; ArcSort( &mfst, icomp ); mfst.SetInputSymbols( osyms ); mfst.SetOutputSymbols( osyms ); return mfst; }
StdVectorFst Phonetisaurus::entryToFSA( vector<string> entry ) { /* Transform an input spelling/pronunciation into an equivalent FSA, adding extra arcs as needed to accomodate clusters. */ StdVectorFst efst; efst.AddState(); efst.SetStart(0); efst.AddState(); efst.AddArc( 0, StdArc( isyms->Find( sb ), isyms->Find( sb ), 0, 1 )); size_t i=0; //Build the basic FSA for( i=0; i<entry.size(); i++) { efst.AddState(); string ch = entry[i]; efst.AddArc( i+1, StdArc( isyms->Find(ch), isyms->Find(ch), 0, i+2 )); if( i==0 ) continue; } //Add any cluster arcs map<vector<string>,int>::iterator it_i; for( it_i=clusters.begin(); it_i!=clusters.end(); it_i++ ) { vector<string>::iterator it_j; vector<string>::iterator start = entry.begin(); vector<string> cluster = (*it_i).first; while( it_j != entry.end() ) { it_j = search( start, entry.end(), cluster.begin(), cluster.end() ); if( it_j != entry.end() ) { efst.AddArc( it_j-entry.begin()+1, StdArc( (*it_i).second, //input symbol (*it_i).second, //output symbol 0, //weight it_j-entry.begin()+cluster.size()+1 //destination state ) ); start = it_j+cluster.size(); } } } efst.AddState(); efst.AddArc( i+1, StdArc( isyms->Find( se ), isyms->Find( se ), 0, i+2)); efst.SetFinal( i+2, 0 ); efst.SetInputSymbols( isyms ); efst.SetOutputSymbols( isyms ); return efst; }
int main(int argc, char* argv[]) { StdVectorFst fst; SymbolTable* isyms; SymbolTable* osyms; { isyms = new SymbolTable("isyms.txt"); osyms = new SymbolTable("osyms.txt"); isyms->AddSymbol("a"); isyms->AddSymbol("b"); isyms->AddSymbol("c"); isyms->Write("isyms.txt"); osyms->AddSymbol("x"); osyms->AddSymbol("y"); osyms->AddSymbol("z"); osyms->Write("osyms.txt"); } { fst.SetInputSymbols(isyms); fst.SetOutputSymbols(osyms); // Adds state 0 to the initially empty FST and make it the start state. fst.AddState(); // 1st state will be state 0 (returned by AddState) fst.SetStart(0); // arg is state ID // Adds two arcs exiting state 0. // Arc constructor args: ilabel, olabel, weight, dest state ID. fst.AddArc(0, StdArc(isyms->Find("a"), osyms->Find("x"), 0.5, 1)); // 1st arg is src state ID fst.AddArc(0, StdArc(isyms->Find("b"), osyms->Find("y"), 1.5, 1)); // Adds state 1 and its arc. fst.AddState(); fst.AddArc(1, StdArc(isyms->Find("c"), osyms->Find("z"), 2.5, 2)); // Adds state 2 and set its final weight. fst.AddState(); fst.SetFinal(2, 3.5); // 1st arg is state ID, 2nd arg weight fst.Write("example.fst"); } StdVectorFst search_fst; { search_fst.SetInputSymbols(isyms); search_fst.SetOutputSymbols(osyms); search_fst.AddState(); // 1st state will be state 0 (returned by AddState) search_fst.SetStart(0); // arg is state ID // Adds two arcs exiting state 0. // Arc constructor args: ilabel, olabel, weight, dest state ID. search_fst.AddArc(0, StdArc(isyms->Find("a"), osyms->Find("x"), 0.5, 1)); // 1st arg is src state ID // Adds state 1 and its arc. search_fst.AddState(); search_fst.AddArc(1, StdArc(isyms->Find("c"), osyms->Find("z"), 2.5, 2)); // Adds state 2 and set its final weight. search_fst.AddState(); search_fst.SetFinal(2, 3.5); // 1st arg is state ID, 2nd arg weight } { for (StateIterator<StdVectorFst> siter(fst); not siter.Done(); siter.Next()) { StdIntersectFst::StateId s = siter.Value(); std::cout << "state=" << s << ":"; for (ArcIterator<StdVectorFst> aiter(fst, s); not aiter.Done(); aiter.Next()) { const StdArc& arc = aiter.Value(); std::cout << arc.ilabel << "/" << arc.olabel << "->" << arc.nextstate << ","; } std::cout << std::endl; } } { Matcher<StdVectorFst> matcher(fst, MATCH_INPUT); matcher.SetState(0); StdArc::Label find_label = 1; if (matcher.Find(find_label)) { for (; not matcher.Done(); matcher.Next()) { const StdArc& arc = matcher.Value(); std::cout << "found=" << arc.ilabel << "/" << arc.olabel << "->" << arc.nextstate << std::endl; } } } // intersect contains strings in both A and B { ArcSort(&fst, StdOLabelCompare()); ArcSort(&search_fst, StdILabelCompare()); /* ComposeFilter compose_filter; if (!GetComposeFilter("auto", &compose_filter)) { LOG(ERROR) << "failed"; exit(1); } const fst::IntersectFstOptions<StdArc> opts; */ //StdIntersectFst ofst(fst, search_fst, opts); StdIntersectFst ofst(fst, search_fst); } }