M2MFstAligner::M2MFstAligner(string _model_file) { VectorFst<LogArc> *model = VectorFst<LogArc>::Read(_model_file); for (StateIterator<VectorFst<LogArc> > siter(*model); !siter.Done(); siter.Next()) { LogArc::StateId q = siter.Value(); for (ArcIterator<VectorFst<LogArc> > aiter(*model, q); !aiter.Done(); aiter.Next()) { const LogArc & arc = aiter.Value(); alignment_model.insert(pair<LogArc::Label,LogWeight> (arc.ilabel, arc.weight)); } } isyms = (SymbolTable *) model->InputSymbols(); int i = 0; eps = isyms->Find(i); //Can't write '0' here for some reason... skip = isyms->Find(1); vector<string> seps = split(isyms->Find(2), "_"); seq1_sep = seps[0]; seq2_sep = seps[1]; s1s2_sep = isyms->Find(3); vector<string> params = split(isyms->Find(4), "_"); seq1_del = params[0].compare("true") ? false : true; seq2_del = params[1].compare("true") ? false : true; seq1_max = atoi(params[2].c_str()); seq2_max = atoi(params[3].c_str()); }
M2MFstAligner::M2MFstAligner( string _model_file, bool _penalize, bool _penalize_em, bool _restrict ){ /* Initialize the aligner with a previously trained model. The model requires that the first several symbols in the symbols table contain the separator and other bookkeeping info. */ restrict = _restrict; penalize = _penalize; penalize_em = _penalize_em; penalties.set_empty_key(0); VectorFst<LogArc>* model = VectorFst<LogArc>::Read( _model_file ); for( StateIterator<VectorFst<LogArc> > siter(*model); !siter.Done(); siter.Next() ){ LogArc::StateId q = siter.Value(); for( ArcIterator<VectorFst<LogArc> > aiter(*model, q); !aiter.Done(); aiter.Next() ){ const LogArc& arc = aiter.Value(); alignment_model.insert( pair<LogArc::Label,LogWeight>(arc.ilabel,arc.weight) ); } } isyms = (SymbolTable*)model->InputSymbols(); int i = 0; eps = isyms->Find(i);//Can't write '0' here for some reason... skip = isyms->Find(1); string tie = "_"; //tie to pack parameters string sseps = isyms->Find(2); vector<string> seps = tokenize_utf8_string( &sseps, &tie ); seq1_sep = seps[0]; seq2_sep = seps[1]; s1s2_sep = isyms->Find(3); string sparams = isyms->Find(4); vector<string> params = tokenize_utf8_string( &sparams, &tie ); seq1_del = params[0].compare("true") ? false : true; seq2_del = params[1].compare("true") ? false : true; seq1_max = atoi(params[2].c_str()); seq2_max = atoi(params[3].c_str()); }