Example #1
0
void PhonetisaurusE2F::_make_loop_and_iomap( const EncodeTable<StdArc>& table ){
  loop = new VectorFst<StdArc>();
  loop->AddState();
  loop->SetStart(0);
  
  if( verbose==true ){
    for( size_t i=1; i<=table.Size(); i++ ){
      const EncodeTable<StdArc>::Tuple *t = table.Decode(i);
      cout << "i=" << i << " in: " << isyms->Find(t->ilabel) << " out: " << osyms->Find(t->olabel) << endl;
    }
  }

  for( size_t i=2; i<=table.Size(); i++ ){
    const EncodeTable<StdArc>::Tuple *t = table.Decode(i);

    if( i2omap->find(t->ilabel)==i2omap->end() ){
      vector<size_t> m;
      m.push_back(t->olabel);
      i2omap->insert(pair<size_t, vector<size_t> >(t->ilabel, m));
      loop->AddArc( 0, StdArc( t->ilabel, t->olabel, StdArc::Weight::One(), 0 ) );
    }else{
      (*i2omap)[t->ilabel].push_back(t->olabel);
      loop->AddArc( 0, StdArc( t->ilabel, t->olabel, StdArc::Weight::One(), 0 ) );
    }
  }
  loop->SetFinal(0, StdArc::Weight::One());
  
  ArcSort(loop, ILabelCompare<StdArc>());
  return;
}
Example #2
0
//STEP 2: Create a filter, which adds multi-token links and skip support
void PhonetisaurusE2F::_make_ifilter( ){
  /*
    Create a filter FST.  This will map arcs in the linear 
    input FSA to longer clusters wherever appropriate. A more
    advanced version can be used to also place restrictions on
    how many phoneme insertions to allow, or how to penalize them.
  */
  ifilter.AddState();
  ifilter.SetStart(0);
  for( size_t j=2; j<isyms->NumSymbols(); j++ ){
    ifilter.AddArc( 0, StdArc( j, j, StdArc::Weight::One(), 0 ) );
  }

  typedef map<vector<string>, size_t>::iterator cl_iter;
  size_t k = 1;
  for( cl_iter it=iclusters->begin(); it != iclusters->end(); it++){
    ifilter.AddState();
    ifilter.AddArc( 0, StdArc( isyms->Find(it->first.at(0)), it->second, StdArc::Weight::One(), k ) );
    ifilter.AddArc( k, StdArc( isyms->Find(it->first.at(1)), 0, StdArc::Weight::One(), 0 ) );
    k++;
  }
  ifilter.SetFinal( 0, StdArc::Weight::One() );

  return;
}
Example #3
0
//STEP 1: Create a linear FSA with skip loops
void PhonetisaurusE2F::_entry_to_skip_fsa( vector<string>* tokens ){
  word = VectorFst<StdArc>();
  word.AddState();
  word.SetStart(0);

  size_t i=0;
  for( i=0; i<tokens->size(); i++){
    word.AddState();
    string ch = tokens->at(i);
    word.AddArc( i, 
		 StdArc( 
			isyms->Find(ch), 
			isyms->Find(ch), 
			StdArc::Weight::One(), i+1 
			 )
		 );
    //If phoneme insertions are to be allowed
    if( allow_ins==true )
      word.AddArc( i, StdArc( 2, 2, StdArc::Weight::One(), i ) );
  }

  if( allow_ins==true )
    word.AddArc( i, StdArc( 2, 2, StdArc::Weight::One(), i ) );
  word.SetFinal( i, StdArc::Weight::One() );
  ArcSort(&word,OLabelCompare<StdArc>());
  return;
}
Example #4
0
void PhonetisaurusE2F::entry_to_fst_m( vector<string>* tokens ){
  /*
    Convert an input word into an equivalent FST.  In this case the 
    entire process is achieved via a 'mechanical' algorithm rather than 
    a series of atomic WFST-based operations.  
  */
    
  word.AddState();
  word.SetStart(0);

  //Build the basic FST
  size_t i=0;    
  for( i=0; i<tokens->size(); i++){
    word.AddState();
    size_t il = isyms->Find(tokens->at(i));
    for( size_t j=0; j<(*i2omap)[il].size(); j++ )
      word.AddArc( i, StdArc( il, (*i2omap)[il][j], StdArc::Weight::One(), i+1 ));
    if( allow_ins==true )
      for( size_t j=0; j<(*i2omap)[2].size(); j++ )
	word.AddArc( i, StdArc( 2, (*i2omap)[2][j], StdArc::Weight::One(), i ) );
  }
  if( allow_ins==true )
    for( size_t j=0; j<(*i2omap)[2].size(); j++ )
      word.AddArc( i, StdArc( 2, (*i2omap)[2][j], StdArc::Weight::One(), i ) );
    
  //Add any cluster arcs
  map<vector<string>,size_t>::iterator it_i;
  for( it_i=iclusters->begin(); it_i!=iclusters->end(); it_i++ ){
    vector<string>::iterator it_j;
    vector<string>::iterator start = tokens->begin();
    vector<string> cluster = (*it_i).first;
    while( it_j != tokens->end() ){
      it_j = search( start, tokens->end(), cluster.begin(), cluster.end() );
      if( it_j != tokens->end() ){
	for( size_t j=0; j<(*i2omap)[(*it_i).second].size(); j++ )
	  word.AddArc( it_j-tokens->begin(), StdArc( 
						    (*it_i).second,                     //input symbol
						    (*i2omap)[(*it_i).second][j],                     //output symbol
						    0,                                  //weight
						    it_j-tokens->begin()+cluster.size()   //destination state
						     ) );
	start = it_j+cluster.size();
      }
    }
  }    

  word.SetFinal( i, StdArc::Weight::One() );

  return;
}
Example #5
0
 StdArc operator() (const LexStdArc& arc) const {
   W w;
   if (i_ == 0)
     w = Times ( arc.weight.Value1(), arc.weight.Value2() );
   if (i_ == 1)
     w = arc.weight.Value1();
   if (i_ == 2)
     w = arc.weight.Value2();
   return StdArc (arc.ilabel, arc.olabel, w, arc.nextstate);
 }
Example #6
0
void Arpa2OpenFST::make_arc( string istate, string ostate, string isym, string osym, double weight ){
  //Build up an arc for the WFST.  Weights default to the Log semiring.
  if( ssyms->Find(istate) == -1 ){
    int new_ssym_id = arpafst.AddState();
    ssyms->AddSymbol( istate, new_ssym_id );
  }
  if( ssyms->Find(ostate) == -1 ){
    int new_ssym_id = arpafst.AddState();
    ssyms->AddSymbol( ostate, new_ssym_id );
  }
  weight = log10_2tropical(weight);

  vector<string> io = tokenize_utf8_string( &isym, &delim );
  if( io.size()==2 ){
    if( io[0].compare(null_sep)==0 )
      io[0] = eps;
    arpafst.AddArc( ssyms->Find(istate), StdArc( isyms->AddSymbol(io[0]), osyms->AddSymbol(io[1]), weight, ssyms->Find(ostate)) );
  }else{
    arpafst.AddArc( ssyms->Find(istate), StdArc( isyms->AddSymbol(isym), osyms->AddSymbol(osym), weight, ssyms->Find(ostate)) );
  }

  return;
}
Example #7
0
void ARPA2WFST::_make_arc( string istate, string ostate, string isym, double weight ){
  //Build up an arc for the WFST.  Weights default to the Log semiring.
  int is_id = ssyms->Find(istate);
  int os_id = ssyms->Find(ostate);
  if( is_id == -1 ){
    is_id = arpafst.AddState();
    ssyms->AddSymbol( istate, is_id );
  }
  if( os_id == -1 ){
    os_id = arpafst.AddState();
    ssyms->AddSymbol( ostate, os_id );
  }
  weight = log10_2tropical(weight);
  int sid = isyms->AddSymbol(isym);

  arpafst.AddArc( is_id, StdArc( sid, sid, weight, os_id) );

  return;
}