void M2MFstAligner::write_lattice(string lattice) { //Write out the entire training set in lattice format //Perform the union first. This output can then // be plugged directly in to a counter to obtain expected // alignment counts for the EM-trained corpus. Yields // far higher-quality joint n-gram models, which are also // more robust for smaller training corpora. //Make sure you call this BEFORE any call to // write_all_alignments // as the latter function will override some of the weights //Chaining the standard Union operation, including using a // rational FST still performs very poorly in the log semiring. //Presumably it's running push or something at each step. It // should be fine to do that just once at the end. //Rolling our own union turns out to be MUCH faster. VectorFst<LogArc> ufst; ufst.AddState(); ufst.SetStart(0); int total_states = 0; for (int i = 0; i < fsas.size(); i++) { TopSort(&fsas[i]); for (StateIterator<VectorFst<LogArc> > siter(fsas[i]); !siter.Done(); siter.Next()) { LogArc::StateId q = siter.Value(); LogArc::StateId r; if (q == 0) r = 0; else r = ufst.AddState(); for (ArcIterator <VectorFst<LogArc> > aiter(fsas[i], q); !aiter.Done(); aiter.Next()) { const LogArc & arc = aiter.Value(); ufst.AddArc(r, LogArc(arc.ilabel, arc.ilabel, arc.weight, arc.nextstate + total_states)); } if (fsas[i].Final(q) != LogWeight::Zero()) ufst.SetFinal(r, LogWeight::One()); } total_states += fsas[i].NumStates() - 1; } //Normalize weights Push(&ufst, REWEIGHT_TO_INITIAL); //Write the resulting lattice to disk ufst.Write(lattice); //Write the syms table too. isyms->WriteText("lattice.syms"); return; }
void M2MFstAligner::write_model(string _model_file) { VectorFst<LogArc> model; model.AddState(); model.SetStart(0); model.SetFinal(0, LogWeight::One()); map<LogArc::Label,LogWeight>::iterator it; for (it = alignment_model.begin(); it != alignment_model.end(); it++) model.AddArc(0, LogArc((*it).first, (*it).first, (*it).second, 0)); model.SetInputSymbols(isyms); model.Write(_model_file); return; }
/** * \brief Adds a state. * \return true if the state requested has already been visited, false otherwise. */ inline pair <StateId, bool> add ( typename KenLMModelT::State& m2nextstate, StateId m1nextstate, Weight m1stateweight ) { static StateId lm = 0; getIdx ( m2nextstate ); ///New history: if ( seenlmstates_.find ( history ) == seenlmstates_.end() ) { seenlmstates_[history] = ++lm; } uint64_t compound = m1nextstate * sid + seenlmstates_[history]; LDEBUG ( "compound id=" << compound ); if ( stateexistence_.find ( compound ) == stateexistence_.end() ) { LDEBUG ( "New State!" ); statemap_[composed_->NumStates()] = pair<StateId, const typename KenLMModelT::State > ( m1nextstate, m2nextstate ); composed_->AddState(); if ( m1stateweight != mw_ ( ZPosInfinity() ) ) composed_->SetFinal ( composed_->NumStates() - 1, m1stateweight ); stateexistence_[compound] = composed_->NumStates() - 1; return pair<StateId, bool> ( composed_->NumStates() - 1, false ); } return pair<StateId, bool> ( stateexistence_[compound], true ); };
void M2MFstAligner::_conditional_max( bool y_given_x ){ /* Compute the conditional distribution, P(Y|X) using the WFST paradigm. This is bassed on the approach from Shu and Hetherington 2002. It is assumed that all WFSTs and operations use the Log semiring. Given: FST1 = P(X,Y) Compute: FST2 = P(X) := Map_inv(Det(RmEps(Proj_i(FST1)))) FST3 = P(Y|X) := Compose(FST2,FST1) Proj_i: project on input labels RmEps: epsilon removal Det: determinize Map_inv: invert weights Notes: An analogous process may be used to compute P(X|Y). In this case one would project on OUTPUT labels - Proj_o, and reverse the composition order to Compose(FST1,FST2). Future work: What we are doing here in terms of *generating* the JOINT fst each time is really just a dumb hack. We *should* encode the model in an FST and encode the individual lattices, rather than doing the hacky manual label encoding that we currently rely on. */ //Joint distribution that we start with VectorFst<LogArc>* joint = new VectorFst<LogArc>(); SymbolTable* misyms = new SymbolTable("misyms"); SymbolTable* mosyms = new SymbolTable("mosyms"); joint->AddState(); joint->AddState(); joint->SetStart(0); joint->SetFinal(1,LogArc::Weight::One()); map<LogArc::Label,LogWeight>::iterator it; for( it=prev_alignment_model.begin(); it != prev_alignment_model.end(); it++ ){ string isym = isyms->Find((*it).first); vector<string> io = tokenize_utf8_string( &isym, &s1s2_sep ); LogArc arc( misyms->AddSymbol(io[0]), mosyms->AddSymbol(io[1]), (*it).second, 1 ); joint->AddArc( 0, arc ); } //VectorFst<LogArc>* joint = new VectorFst<LogArc>(); //Push<LogArc,REWEIGHT_TO_FINAL>(*_joint, joint, kPushWeights); //joint->SetFinal(1,LogWeight::One()); joint->Write("m2mjoint.fst"); //BEGIN COMPUTE MARGINAL P(X) VectorFst<LogArc>* dmarg; if( y_given_x ) dmarg = new VectorFst<LogArc>(ProjectFst<LogArc>(*joint, PROJECT_INPUT)); else dmarg = new VectorFst<LogArc>(ProjectFst<LogArc>(*joint, PROJECT_OUTPUT)); RmEpsilon(dmarg); VectorFst<LogArc>* marg = new VectorFst<LogArc>(); Determinize(*dmarg, marg); ArcMap(marg, InvertWeightMapper<LogArc>()); if( y_given_x ) ArcSort(marg, OLabelCompare<LogArc>()); else ArcSort(marg, ILabelCompare<LogArc>()); //END COMPUTE MARGINAL P(X) marg->Write("marg.fst"); //CONDITIONAL P(Y|X) VectorFst<LogArc>* cond = new VectorFst<LogArc>(); if( y_given_x ) Compose(*marg, *joint, cond); else Compose(*joint, *marg, cond); //cond now contains the conditional distribution P(Y|X) cond->Write("cond.fst"); //Now update the model with the new values for( MutableArcIterator<VectorFst<LogArc> > aiter(cond, 0); !aiter.Done(); aiter.Next() ){ LogArc arc = aiter.Value(); string lab = misyms->Find(arc.ilabel)+"}"+mosyms->Find(arc.olabel); int labi = isyms->Find(lab); alignment_model[labi] = arc.weight; prev_alignment_model[labi] = LogWeight::Zero(); } delete joint, marg, cond, dmarg; delete misyms, mosyms; return; }
/** * Create an FST based on an RNN */ void FlatBOFstBuilder::convertRNN(CRnnLM & rnnlm, VectorFst<LogArc> &fst) { queue<NeuronFstHistory> q; VectorFst<LogArc> new_fst; NeuronFstHistory fsth(rnnlm.getHiddenLayerSize(),getNumBins()); FstIndex id = 0; NeuronFstHistory new_fsth(rnnlm.getHiddenLayerSize(),getNumBins()); FstIndex new_id; NeuronFstHistory min_backoff(rnnlm.getHiddenLayerSize(),getNumBins()); set<NeuronFstHistory>set_min_backoff; NeuronFstHistory bo_fsth(rnnlm.getHiddenLayerSize(),getNumBins()); bool backoff = false; vector<FstIndex> deleted; real p = 0.00; real p_joint = 0.00; real entropy = 0.0; real delta = 0.0; vector<real> all_prob(rnnlm.getVocabSize()); vector<real> posterior(10); map< FstIndex,set<FstIndex> > pred; vector<bool> non_bo_pred(rnnlm.getVocabSize()); vector<int> to_be_added; vector<int> to_be_removed; for (int i = 0; i < rnnlm.getVocabSize(); i++) { to_be_removed.push_back(i); } vector<real> to_be_added_prob; FstIndex n_added = 0; FstIndex n_processed = 0; FstIndex next_n_added = 0; FstIndex next_n_processed = 0; FstIndex n_backoff = 0; FstIndex n_only_backoff = 0; int v = rnnlm.getVocabSize(); int w = 0; // Initialize rnnlm.copyHiddenLayerToInput(); // printNeurons(rnnlm.getInputLayer(),0,10); // Initial state ( 0 | hidden layer after </s>) printNeurons(rnnlm.getHiddenLayer(),0,10); fsth.setFstHistory(rnnlm, *dzer); fsth.setLastWord(0); q.push(fsth); addFstState(id, new NeuronFstHistory(fsth), fst); fst.SetStart(INIT_STATE); // Final state (don't care about the associated discrete representation) fst.AddState(); fst.SetFinal(FINAL_STATE, LogWeight::One()); /*posterior.at(INIT_STATE) = MY_LOG_ONE;*/ min_backoff.setLastWord(-1); computeEntropyAndConditionals(entropy, all_prob, rnnlm, min_backoff); min_backoff = getBackoff(rnnlm, min_backoff, set_min_backoff, all_prob, to_be_removed); cout << "MIN BACKOFF " << min_backoff.toString() << endl; set_min_backoff.insert(min_backoff); // addFstState(id, min_backoff, fst); // q.push(min_backoff); // Estimate number of backoff loop to bound the backoff path length // float ratioa = 0.0; // float ratiob = 0.0; float ratio = 0.0; // for (int i=0; i < min_backoff.getNumDims(); i++) { // if (min_backoff.getDim(i) == 1) { // ratioa++; // } // if (fsth.getDim(i) == 1) { // ratiob++; // } // } // ratioa /= min_backoff.getNumDims(); // ratiob /= min_backoff.getNumDims(); // ratio = (ratioa*(1.0-ratiob))+(ratiob*(1.0-ratioa)); ratio=1.0; // printf("ratio=%f\t%i BO loops\n", ratio, n_bo_loops); //foreach state in the queue while (!q.empty()) { fsth = q.front(); q.pop(); id = h2state[&fsth]; state2h.push_back(new NeuronFstHistory(fsth)); if (id == FINAL_STATE) { continue; } dprintf(1,"-- STUDY STATE %li = %s\n", id, fsth.toString().c_str()); /* try { posterior.at(id) = MY_LOG_ONE; } catch (exception e) { posterior.resize((int) (posterior.size()*1.5)+1); posterior.at(id) = MY_LOG_ONE; }*/ computeEntropyAndConditionals(entropy, all_prob, rnnlm, fsth); //compute BO in advance and check if it is a min BO node bo_fsth = getBackoff(rnnlm, fsth, set_min_backoff, all_prob, to_be_removed); if (bo_fsth == fsth) { bo_fsth = min_backoff; } //foreach w (ie, foreach word of each class c) //test if the edge has to kept or removed backoff = false; //no backoff yet since no edge has been removed for (w=0; w < rnnlm.getVocabSize(); w++) { p = all_prob[w]; /*p_joint = exp(-posterior[id]-p);*/ p_joint = exp(-p); delta = -1.0*p_joint*log2(p_joint); //accept edge if this leads to a minimum //relative gain of the entropy dprintf(2,"P = %e \tP_joint = %e \tH = %e \tDelta =%e \tDelta H = %.6f %%\n",exp(-p), p_joint, entropy, delta, 100.0*delta/entropy); if (set_min_backoff.find(fsth) != set_min_backoff.end() || (delta > pruning_threshold*entropy)) { // if ((fsth == min_backoff) || (delta > pruning_threshold*entropy)) { next_n_added++; to_be_added.push_back(w); to_be_added_prob.push_back(p); dprintf(2,"\tACCEPT [%li] -- %i (%s) / %f --> ...\t(%e > %e)\n", id, w, rnnlm.getWordString(w), p, delta, pruning_threshold*entropy); // to_be_removed.push_back(w); } //backoff else { // to_be_removed.push_back(w); backoff = true; dprintf(2,"\tPRUNE [%li] -- %i / %f --> ...\n", id, w, p); } //print if (next_n_processed % 100000 == 0) { fprintf(stderr, "\rH=%.5f / N proc'd=%li / N added=%li (%.5f %%) / N bo=%li (%.5f %%) / %li/%li Nodes (%2.1f %%) / N min BO=%i", entropy, n_processed, n_added, ((float) n_added/ (float)n_processed)*100.0, n_backoff, ((float) n_backoff/ (float)n_added)*100.0, id, id+q.size(), 100.0 - (float) (100.0*id/(id+q.size())), (int) set_min_backoff.size()); } next_n_processed++; // } } //Set a part of the new FST history new_fsth.setFstHistory(rnnlm, *dzer); //if at least one word is backing off if (backoff) { n_backoff++; if (to_be_added.size() == 0) { n_only_backoff++; } if (addFstState(new_id, new NeuronFstHistory(bo_fsth), fst)) { q.push(bo_fsth); try { non_bo_pred.at(new_id) = false; } catch (exception e) { non_bo_pred.resize(new_id+(int) (non_bo_pred.size()*0.5)+1); non_bo_pred.at(new_id) = false; } } dprintf(1,"BACKOFF\t[%li]\t(%s)\n-------\t[%li]\t(%s)\n", id, fsth.toString().c_str(), new_id, bo_fsth.toString().c_str()); fst.AddArc(id, LogArc(EPSILON, EPSILON, LogWeight::Zero(), new_id)); addPred(pred, new_id, id); } vector<real>::iterator it_p = to_be_added_prob.begin(); for (vector<int>::iterator it = to_be_added.begin(); it != to_be_added.end(); ++it) { w = *it; p = *it_p; if (w == 0) { fst.AddArc(id, LogArc(FstWord(w),FstWord(w),p,FINAL_STATE)); dprintf(1,"EDGE [%li] (%s)\n---- %i (%s) / %f -->\n---- [%li] FINAL STATE)\n\n", id, fsth.toString().c_str(), FstWord(w), rnnlm.getWordString(w), p, FINAL_STATE); } //accept edge else { new_fsth.setLastWord(w); //if sw not in the memory //then add a new state for sw in the FST and push sw in the queue if (addFstState(new_id, new NeuronFstHistory(new_fsth), fst)) { q.push(new_fsth); try { non_bo_pred.at(new_id) = true; } catch (exception e) { non_bo_pred.resize(new_id+(int) (non_bo_pred.size()*0.5)+1); non_bo_pred.at(new_id) = true; } } else { /* already exists */ } //add the edge in the FST non_bo_pred.at(new_id) = true; fst.AddArc(id, LogArc(FstWord(w),FstWord(w),p,new_id)); dprintf(1,"EDGE [%li] (%s)\n---- %i (%s) / %f -->\n---- [%li] (%s)\n\n", id, fsth.toString().c_str(), FstWord(w), rnnlm.getWordString(w), p, new_id, new_fsth.toString().c_str()); // posterior.at(new_id) += posterior[id]*p; } /*if (posterior[id]+p < LogWeight::Zero().Value()) { p_joint = exp(-posterior[id]-p); entropy -= p_joint*log2(p_joint); }*/ ++it_p; } n_added = next_n_added; n_processed = next_n_processed; //reset queues to_be_added.clear(); to_be_added_prob.clear(); // to_be_removed.clear(); } cout << endl; //compute backoff weights deleted = compactBackoffNodes(fst, pred, non_bo_pred); computeAllBackoff(fst, pred); //remove useless nodes removeStates(fst, new_fst, deleted); fst.DeleteStates(); fst = new_fst; //Fill the table of symbols SymbolTable dic("dictionnary"); dic.AddSymbol("*", 0); for (int i=0; i<rnnlm.getVocabSize(); i++) { dic.AddSymbol(string(rnnlm.getWordString(i)), i+1); } fst.SetInputSymbols(&dic); fst.SetOutputSymbols(&dic); //printf("H=%.5f / N proc'd=%li / N added=%li (%.5f %%) %li/%li Nodes (%2.1f %%)\n", entropy, n_processed, n_added, ((float) n_added/ (float)n_processed)*100.0, id, id+q.size(), 100.0 - (float) (100.0*id/(id+q.size()))); cout << "END" << endl; }