/* * This computes the function value at a given weight vector. * This should be parallelized by dividing the training examples * into subsets and doing each on a different thread. * * @param weights - the value of the weights * @param the value of the function * */ double WFST_Trainer_Local::value2(const column_vector &weights) { cout << "LIKELIHOOD\n"; double likelihood = 0.0; update_arc_weights(weights); // ``filler'' FSTs to replace // the results from the composition // declared outside for loop for efficiency VectorFst<LogArc> medial; VectorFst<LogArc> final; medial.SetInputSymbols(fst->InputSymbols()); medial.SetOutputSymbols(fst->OutputSymbols()); final.SetInputSymbols(fst->InputSymbols());
/** * Create an FST based on an RNN */ void FlatBOFstBuilder::convertRNN(CRnnLM & rnnlm, VectorFst<LogArc> &fst) { queue<NeuronFstHistory> q; VectorFst<LogArc> new_fst; NeuronFstHistory fsth(rnnlm.getHiddenLayerSize(),getNumBins()); FstIndex id = 0; NeuronFstHistory new_fsth(rnnlm.getHiddenLayerSize(),getNumBins()); FstIndex new_id; NeuronFstHistory min_backoff(rnnlm.getHiddenLayerSize(),getNumBins()); set<NeuronFstHistory>set_min_backoff; NeuronFstHistory bo_fsth(rnnlm.getHiddenLayerSize(),getNumBins()); bool backoff = false; vector<FstIndex> deleted; real p = 0.00; real p_joint = 0.00; real entropy = 0.0; real delta = 0.0; vector<real> all_prob(rnnlm.getVocabSize()); vector<real> posterior(10); map< FstIndex,set<FstIndex> > pred; vector<bool> non_bo_pred(rnnlm.getVocabSize()); vector<int> to_be_added; vector<int> to_be_removed; for (int i = 0; i < rnnlm.getVocabSize(); i++) { to_be_removed.push_back(i); } vector<real> to_be_added_prob; FstIndex n_added = 0; FstIndex n_processed = 0; FstIndex next_n_added = 0; FstIndex next_n_processed = 0; FstIndex n_backoff = 0; FstIndex n_only_backoff = 0; int v = rnnlm.getVocabSize(); int w = 0; // Initialize rnnlm.copyHiddenLayerToInput(); // printNeurons(rnnlm.getInputLayer(),0,10); // Initial state ( 0 | hidden layer after </s>) printNeurons(rnnlm.getHiddenLayer(),0,10); fsth.setFstHistory(rnnlm, *dzer); fsth.setLastWord(0); q.push(fsth); addFstState(id, new NeuronFstHistory(fsth), fst); fst.SetStart(INIT_STATE); // Final state (don't care about the associated discrete representation) fst.AddState(); fst.SetFinal(FINAL_STATE, LogWeight::One()); /*posterior.at(INIT_STATE) = MY_LOG_ONE;*/ min_backoff.setLastWord(-1); computeEntropyAndConditionals(entropy, all_prob, rnnlm, min_backoff); min_backoff = getBackoff(rnnlm, min_backoff, set_min_backoff, all_prob, to_be_removed); cout << "MIN BACKOFF " << min_backoff.toString() << endl; set_min_backoff.insert(min_backoff); // addFstState(id, min_backoff, fst); // q.push(min_backoff); // Estimate number of backoff loop to bound the backoff path length // float ratioa = 0.0; // float ratiob = 0.0; float ratio = 0.0; // for (int i=0; i < min_backoff.getNumDims(); i++) { // if (min_backoff.getDim(i) == 1) { // ratioa++; // } // if (fsth.getDim(i) == 1) { // ratiob++; // } // } // ratioa /= min_backoff.getNumDims(); // ratiob /= min_backoff.getNumDims(); // ratio = (ratioa*(1.0-ratiob))+(ratiob*(1.0-ratioa)); ratio=1.0; // printf("ratio=%f\t%i BO loops\n", ratio, n_bo_loops); //foreach state in the queue while (!q.empty()) { fsth = q.front(); q.pop(); id = h2state[&fsth]; state2h.push_back(new NeuronFstHistory(fsth)); if (id == FINAL_STATE) { continue; } dprintf(1,"-- STUDY STATE %li = %s\n", id, fsth.toString().c_str()); /* try { posterior.at(id) = MY_LOG_ONE; } catch (exception e) { posterior.resize((int) (posterior.size()*1.5)+1); posterior.at(id) = MY_LOG_ONE; }*/ computeEntropyAndConditionals(entropy, all_prob, rnnlm, fsth); //compute BO in advance and check if it is a min BO node bo_fsth = getBackoff(rnnlm, fsth, set_min_backoff, all_prob, to_be_removed); if (bo_fsth == fsth) { bo_fsth = min_backoff; } //foreach w (ie, foreach word of each class c) //test if the edge has to kept or removed backoff = false; //no backoff yet since no edge has been removed for (w=0; w < rnnlm.getVocabSize(); w++) { p = all_prob[w]; /*p_joint = exp(-posterior[id]-p);*/ p_joint = exp(-p); delta = -1.0*p_joint*log2(p_joint); //accept edge if this leads to a minimum //relative gain of the entropy dprintf(2,"P = %e \tP_joint = %e \tH = %e \tDelta =%e \tDelta H = %.6f %%\n",exp(-p), p_joint, entropy, delta, 100.0*delta/entropy); if (set_min_backoff.find(fsth) != set_min_backoff.end() || (delta > pruning_threshold*entropy)) { // if ((fsth == min_backoff) || (delta > pruning_threshold*entropy)) { next_n_added++; to_be_added.push_back(w); to_be_added_prob.push_back(p); dprintf(2,"\tACCEPT [%li] -- %i (%s) / %f --> ...\t(%e > %e)\n", id, w, rnnlm.getWordString(w), p, delta, pruning_threshold*entropy); // to_be_removed.push_back(w); } //backoff else { // to_be_removed.push_back(w); backoff = true; dprintf(2,"\tPRUNE [%li] -- %i / %f --> ...\n", id, w, p); } //print if (next_n_processed % 100000 == 0) { fprintf(stderr, "\rH=%.5f / N proc'd=%li / N added=%li (%.5f %%) / N bo=%li (%.5f %%) / %li/%li Nodes (%2.1f %%) / N min BO=%i", entropy, n_processed, n_added, ((float) n_added/ (float)n_processed)*100.0, n_backoff, ((float) n_backoff/ (float)n_added)*100.0, id, id+q.size(), 100.0 - (float) (100.0*id/(id+q.size())), (int) set_min_backoff.size()); } next_n_processed++; // } } //Set a part of the new FST history new_fsth.setFstHistory(rnnlm, *dzer); //if at least one word is backing off if (backoff) { n_backoff++; if (to_be_added.size() == 0) { n_only_backoff++; } if (addFstState(new_id, new NeuronFstHistory(bo_fsth), fst)) { q.push(bo_fsth); try { non_bo_pred.at(new_id) = false; } catch (exception e) { non_bo_pred.resize(new_id+(int) (non_bo_pred.size()*0.5)+1); non_bo_pred.at(new_id) = false; } } dprintf(1,"BACKOFF\t[%li]\t(%s)\n-------\t[%li]\t(%s)\n", id, fsth.toString().c_str(), new_id, bo_fsth.toString().c_str()); fst.AddArc(id, LogArc(EPSILON, EPSILON, LogWeight::Zero(), new_id)); addPred(pred, new_id, id); } vector<real>::iterator it_p = to_be_added_prob.begin(); for (vector<int>::iterator it = to_be_added.begin(); it != to_be_added.end(); ++it) { w = *it; p = *it_p; if (w == 0) { fst.AddArc(id, LogArc(FstWord(w),FstWord(w),p,FINAL_STATE)); dprintf(1,"EDGE [%li] (%s)\n---- %i (%s) / %f -->\n---- [%li] FINAL STATE)\n\n", id, fsth.toString().c_str(), FstWord(w), rnnlm.getWordString(w), p, FINAL_STATE); } //accept edge else { new_fsth.setLastWord(w); //if sw not in the memory //then add a new state for sw in the FST and push sw in the queue if (addFstState(new_id, new NeuronFstHistory(new_fsth), fst)) { q.push(new_fsth); try { non_bo_pred.at(new_id) = true; } catch (exception e) { non_bo_pred.resize(new_id+(int) (non_bo_pred.size()*0.5)+1); non_bo_pred.at(new_id) = true; } } else { /* already exists */ } //add the edge in the FST non_bo_pred.at(new_id) = true; fst.AddArc(id, LogArc(FstWord(w),FstWord(w),p,new_id)); dprintf(1,"EDGE [%li] (%s)\n---- %i (%s) / %f -->\n---- [%li] (%s)\n\n", id, fsth.toString().c_str(), FstWord(w), rnnlm.getWordString(w), p, new_id, new_fsth.toString().c_str()); // posterior.at(new_id) += posterior[id]*p; } /*if (posterior[id]+p < LogWeight::Zero().Value()) { p_joint = exp(-posterior[id]-p); entropy -= p_joint*log2(p_joint); }*/ ++it_p; } n_added = next_n_added; n_processed = next_n_processed; //reset queues to_be_added.clear(); to_be_added_prob.clear(); // to_be_removed.clear(); } cout << endl; //compute backoff weights deleted = compactBackoffNodes(fst, pred, non_bo_pred); computeAllBackoff(fst, pred); //remove useless nodes removeStates(fst, new_fst, deleted); fst.DeleteStates(); fst = new_fst; //Fill the table of symbols SymbolTable dic("dictionnary"); dic.AddSymbol("*", 0); for (int i=0; i<rnnlm.getVocabSize(); i++) { dic.AddSymbol(string(rnnlm.getWordString(i)), i+1); } fst.SetInputSymbols(&dic); fst.SetOutputSymbols(&dic); //printf("H=%.5f / N proc'd=%li / N added=%li (%.5f %%) %li/%li Nodes (%2.1f %%)\n", entropy, n_processed, n_added, ((float) n_added/ (float)n_processed)*100.0, id, id+q.size(), 100.0 - (float) (100.0*id/(id+q.size()))); cout << "END" << endl; }
void train_model(string eps, string s1s2_sep, string skip, int order, string smooth, string prefix, string seq_sep, string prune, double theta, string count_pattern) { namespace s = fst::script; using fst::script::FstClass; using fst::script::MutableFstClass; using fst::script::VectorFstClass; using fst::script::WeightClass; // create symbols file cout << "Generating symbols..." << endl; NGramInput *ingram = new NGramInput(prefix + ".corpus.aligned", prefix + ".corpus.syms", "", eps, unknown_symbol, "", ""); ingram->ReadInput(0, 1); // compile strings into a far archive cout << "Compiling symbols into FAR archive..." << endl; fst::FarEntryType fet = fst::StringToFarEntryType(entry_type); fst::FarTokenType ftt = fst::StringToFarTokenType(token_type); fst::FarType fartype = fst::FarTypeFromString(far_type); delete ingram; vector<string> in_fname; in_fname.push_back(prefix + ".corpus.aligned"); fst::script::FarCompileStrings(in_fname, prefix + ".corpus.far", arc_type, fst_type, fartype, generate_keys, fet, ftt, prefix + ".corpus.syms", unknown_symbol, keep_symbols, initial_symbols, allow_negative_labels, file_list_input, key_prefix, key_suffix); //count n-grams cout << "Counting n-grams..." << endl; NGramCounter<Log64Weight> ngram_counter(order, epsilon_as_backoff); FstReadOptions opts; FarReader<StdArc> *far_reader; far_reader = FarReader<StdArc>::Open(prefix + ".corpus.far"); int fstnumber = 1; const Fst<StdArc> *ifst = 0, *lfst = 0; while (!far_reader->Done()) { if (ifst) delete ifst; ifst = far_reader->GetFst().Copy(); if (!ifst) { E_FATAL("ngramcount: unable to read fst #%d\n", fstnumber); //exit(1); } bool counted = false; if (ifst->Properties(kString | kUnweighted, true)) { counted = ngram_counter.Count(*ifst); } else { VectorFst<Log64Arc> log_ifst; Map(*ifst, &log_ifst, ToLog64Mapper<StdArc> ()); counted = ngram_counter.Count(&log_ifst); } if (!counted) cout << "ngramcount: fst #" << fstnumber << endl; if (ifst->InputSymbols() != 0) { // retain for symbol table if (lfst) delete lfst; // delete previously observed symbol table lfst = ifst; ifst = 0; } far_reader->Next(); ++fstnumber; } delete far_reader; if (!lfst) { E_FATAL("None of the input FSTs had a symbol table\n"); //exit(1); } VectorFst<StdArc> vfst; ngram_counter.GetFst(&vfst); ArcSort(&vfst, StdILabelCompare()); vfst.SetInputSymbols(lfst->InputSymbols()); vfst.SetOutputSymbols(lfst->InputSymbols()); vfst.Write(prefix + ".corpus.cnts"); StdMutableFst *fst = StdMutableFst::Read(prefix + ".corpus.cnts", true); if (smooth != "no") { cout << "Smoothing model..." << endl; bool prefix_norm = 0; if (smooth == "presmoothed") { // only for use with randgen counts prefix_norm = 1; smooth = "unsmoothed"; // normalizes only based on prefix count } if (smooth == "kneser_ney") { NGramKneserNey ngram(fst, backoff, backoff_label, norm_eps, check_consistency, discount_D, bins); ngram.MakeNGramModel(); fst = ngram.GetMutableFst(); } else if (smooth == "absolute") { NGramAbsolute ngram(fst, backoff, backoff_label, norm_eps, check_consistency, discount_D, bins); ngram.MakeNGramModel(); fst = ngram.GetMutableFst(); } else if (smooth == "katz") { NGramKatz ngram(fst, backoff, backoff_label, norm_eps, check_consistency, bins); ngram.MakeNGramModel(); fst = ngram.GetMutableFst(); } else if (smooth == "witten_bell") { NGramWittenBell ngram(fst, backoff, backoff_label, norm_eps, check_consistency, witten_bell_k); ngram.MakeNGramModel(); fst = ngram.GetMutableFst(); } else if (smooth == "unsmoothed") { NGramUnsmoothed ngram(fst, 1, prefix_norm, backoff_label, norm_eps, check_consistency); ngram.MakeNGramModel(); fst = ngram.GetMutableFst(); } else { E_FATAL("Bad smoothing method: %s\n", smooth.c_str()); } } if (prune != "no") { cout << "Pruning model..." << endl; if (prune == "count_prune") { NGramCountPrune ngramsh(fst, count_pattern, shrink_opt, total_unigram_count, backoff_label, norm_eps, check_consistency); ngramsh.ShrinkNGramModel(); } else if (prune == "relative_entropy") { NGramRelEntropy ngramsh(fst, theta, shrink_opt, total_unigram_count, backoff_label, norm_eps, check_consistency); ngramsh.ShrinkNGramModel(); } else if (prune == "seymore") { NGramSeymoreShrink ngramsh(fst, theta, shrink_opt, total_unigram_count, backoff_label, norm_eps, check_consistency); ngramsh.ShrinkNGramModel(); } else { E_FATAL("Bad shrink method: %s\n", prune.c_str()); } } cout << "Minimizing model..." << endl; MutableFstClass *minimized = new s::MutableFstClass(*fst); Minimize(minimized, 0, fst::kDelta); fst = minimized->GetMutableFst<StdArc>(); cout << "Correcting final model..." << endl; StdMutableFst *out = new StdVectorFst(); relabel(fst, out, prefix, eps, skip, s1s2_sep, seq_sep); cout << "Writing binary model to disk..." << endl; out->Write(prefix + ".fst"); }