예제 #1
0
M2MFstAligner::M2MFstAligner(string _model_file)
{
    VectorFst<LogArc> *model = VectorFst<LogArc>::Read(_model_file);
    for (StateIterator<VectorFst<LogArc> > siter(*model);
            !siter.Done(); siter.Next()) {
        LogArc::StateId q = siter.Value();
        for (ArcIterator<VectorFst<LogArc> > aiter(*model, q);
                !aiter.Done(); aiter.Next()) {
            const LogArc & arc = aiter.Value();
            alignment_model.insert(pair<LogArc::Label,LogWeight> (arc.ilabel, arc.weight));
        }
    }
    isyms = (SymbolTable *) model->InputSymbols();
    int i = 0;
    eps = isyms->Find(i);       //Can't write '0' here for some reason...
    skip = isyms->Find(1);
    vector<string> seps = split(isyms->Find(2), "_");
    seq1_sep = seps[0];
    seq2_sep = seps[1];
    s1s2_sep = isyms->Find(3);
    vector<string> params = split(isyms->Find(4), "_");
    seq1_del = params[0].compare("true") ? false : true;
    seq2_del = params[1].compare("true") ? false : true;
    seq1_max = atoi(params[2].c_str());
    seq2_max = atoi(params[3].c_str());

}
예제 #2
0
M2MFstAligner::M2MFstAligner( string _model_file, bool _penalize, bool _penalize_em, bool _restrict  ){
  /*
    Initialize the aligner with a previously trained model.
    The model requires that the first several symbols in the 
    symbols table contain the separator and other bookkeeping info.
  */

  restrict    = _restrict;
  penalize    = _penalize;
  penalize_em = _penalize_em;
  penalties.set_empty_key(0);
  VectorFst<LogArc>* model = VectorFst<LogArc>::Read( _model_file );
  for( StateIterator<VectorFst<LogArc> > siter(*model); !siter.Done(); siter.Next() ){
    LogArc::StateId q = siter.Value();
    for( ArcIterator<VectorFst<LogArc> > aiter(*model, q); !aiter.Done(); aiter.Next() ){
      const LogArc& arc = aiter.Value();
      alignment_model.insert( pair<LogArc::Label,LogWeight>(arc.ilabel,arc.weight) );
    }
  }      
  isyms = (SymbolTable*)model->InputSymbols();
  int i = 0;
  eps      = isyms->Find(i);//Can't write '0' here for some reason...
  skip     = isyms->Find(1);
  string tie = "_"; //tie to pack parameters

  string sseps = isyms->Find(2);
  vector<string> seps = tokenize_utf8_string( &sseps, &tie );
  seq1_sep = seps[0];
  seq2_sep = seps[1];
  s1s2_sep = isyms->Find(3);

  string sparams = isyms->Find(4);
  vector<string> params = tokenize_utf8_string( &sparams, &tie );
  seq1_del = params[0].compare("true") ? false : true;
  seq2_del = params[1].compare("true") ? false : true;
  seq1_max = atoi(params[2].c_str());
  seq2_max = atoi(params[3].c_str());

}