C++ (Cpp) VectorFst::SetInputSymbols Examples

Programming Language: C++ (Cpp)

Class/Type: VectorFst

Method/Function: SetInputSymbols

Examples at hotexamples.com: 4

C++ (Cpp) VectorFst::SetInputSymbols - 4 examples found. These are the top rated real world C++ (Cpp) examples of VectorFst::SetInputSymbols extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Write(6)

AddArc(5)

AddState(5)

SetFinal(5)

SetStart(5)

NumStates(4)

SetInputSymbols(4)

SetOutputSymbols(3)

Final(2)

InputSymbols(2)

DeleteStates(1)

Start(1)

Example #1

Show file

File: M2MFstAligner.cpp Project: ShahAlay/sphinxtrain

void
M2MFstAligner::write_model(string _model_file)
{
    VectorFst<LogArc> model;
    model.AddState();
    model.SetStart(0);
    model.SetFinal(0, LogWeight::One());
    map<LogArc::Label,LogWeight>::iterator it;
    for (it = alignment_model.begin(); it != alignment_model.end(); it++)
        model.AddArc(0, LogArc((*it).first, (*it).first, (*it).second, 0));
    model.SetInputSymbols(isyms);
    model.Write(_model_file);
    return;
}

Example #2

Show file

File: wfst-train-local-fast.cpp Project: fgregg/stochastic_edit_acl_2014

/*
 * This computes the function value at a given weight vector.
 * This should be parallelized by dividing the training examples
 * into subsets and doing each on a different thread.
 *
 * @param weights - the value of the weights
 * @param the value of the function
 *
 */
double WFST_Trainer_Local::value2(const column_vector &weights) {
  
  cout << "LIKELIHOOD\n";
  double likelihood = 0.0;
  update_arc_weights(weights);

  // ``filler'' FSTs to replace
  // the results from the composition
  // declared outside for loop for efficiency
  VectorFst<LogArc> medial;
  VectorFst<LogArc> final;
  
  medial.SetInputSymbols(fst->InputSymbols());
  medial.SetOutputSymbols(fst->OutputSymbols());
  final.SetInputSymbols(fst->InputSymbols());

Example #3

Show file

File: flat_bo_fstbuilder.cpp Project: glecorve/rnnlm2wfst

/**
 * Create an FST based on an RNN
 */
void FlatBOFstBuilder::convertRNN(CRnnLM & rnnlm, VectorFst<LogArc> &fst) {
	queue<NeuronFstHistory> q;
	VectorFst<LogArc> new_fst;
	
	NeuronFstHistory fsth(rnnlm.getHiddenLayerSize(),getNumBins());
	FstIndex id = 0;
	
	NeuronFstHistory new_fsth(rnnlm.getHiddenLayerSize(),getNumBins());
	FstIndex new_id;

	NeuronFstHistory min_backoff(rnnlm.getHiddenLayerSize(),getNumBins());
	set<NeuronFstHistory>set_min_backoff;
	
	NeuronFstHistory bo_fsth(rnnlm.getHiddenLayerSize(),getNumBins());
	bool backoff = false;
	vector<FstIndex> deleted;


	real p = 0.00;
	real p_joint = 0.00;
	real entropy = 0.0;
	real delta = 0.0;
	vector<real> all_prob(rnnlm.getVocabSize());
 	vector<real> posterior(10);
	
	map< FstIndex,set<FstIndex> > pred;
	vector<bool> non_bo_pred(rnnlm.getVocabSize());
	vector<int> to_be_added;
	vector<int> to_be_removed;
	for (int i = 0; i < rnnlm.getVocabSize(); i++) {
		to_be_removed.push_back(i);
	}
	vector<real> to_be_added_prob;


 	FstIndex n_added = 0;
 	FstIndex n_processed = 0;
 	FstIndex next_n_added = 0;
 	FstIndex next_n_processed = 0;
 	FstIndex n_backoff = 0;
 	FstIndex n_only_backoff = 0;
 	
	int v = rnnlm.getVocabSize();
	int w = 0;


	// Initialize
	rnnlm.copyHiddenLayerToInput();
//	printNeurons(rnnlm.getInputLayer(),0,10);

	// Initial state ( 0 | hidden layer after </s>)
	printNeurons(rnnlm.getHiddenLayer(),0,10);
	fsth.setFstHistory(rnnlm, *dzer);
	fsth.setLastWord(0);
	q.push(fsth);
	addFstState(id, new NeuronFstHistory(fsth), fst);
	fst.SetStart(INIT_STATE);
	
	// Final state (don't care about the associated discrete representation)
	fst.AddState();
	fst.SetFinal(FINAL_STATE, LogWeight::One());
	
 	/*posterior.at(INIT_STATE) = MY_LOG_ONE;*/
	min_backoff.setLastWord(-1);
	computeEntropyAndConditionals(entropy, all_prob, rnnlm, min_backoff);
	min_backoff = getBackoff(rnnlm, min_backoff, set_min_backoff, all_prob, to_be_removed);
	cout << "MIN BACKOFF " << min_backoff.toString() << endl;
	set_min_backoff.insert(min_backoff);
	
//	addFstState(id, min_backoff, fst);
//	q.push(min_backoff);
	

	
	// Estimate number of backoff loop to bound the backoff path length
// 	float ratioa = 0.0;
// 	float ratiob = 0.0;
	float ratio = 0.0;
// 	for (int i=0; i < min_backoff.getNumDims(); i++) {
// 		if (min_backoff.getDim(i) == 1) {
// 			ratioa++;
// 		}
// 		if (fsth.getDim(i) == 1) {
// 			ratiob++;
// 		}
// 	}
// 	ratioa /= min_backoff.getNumDims();
// 	ratiob /= min_backoff.getNumDims();
// 	ratio = (ratioa*(1.0-ratiob))+(ratiob*(1.0-ratioa));
	ratio=1.0;

//	printf("ratio=%f\t%i BO loops\n", ratio, n_bo_loops);
	
	
	
	//foreach state in the queue
	while (!q.empty()) {
		fsth = q.front();
		q.pop();
		id = h2state[&fsth];
		state2h.push_back(new NeuronFstHistory(fsth));
		if (id == FINAL_STATE) { continue; }


		
		
	dprintf(1,"-- STUDY STATE %li = %s\n", id, fsth.toString().c_str());
	

/*		try { posterior.at(id) = MY_LOG_ONE; }
		catch (exception e) {
			posterior.resize((int) (posterior.size()*1.5)+1);
			posterior.at(id) = MY_LOG_ONE;
		}*/
		
		computeEntropyAndConditionals(entropy, all_prob, rnnlm, fsth);
		
		//compute BO in advance and check if it is a min BO node
		bo_fsth = getBackoff(rnnlm, fsth, set_min_backoff, all_prob, to_be_removed);
		if (bo_fsth == fsth) { bo_fsth = min_backoff; }
			
		//foreach w (ie, foreach word of each class c)
		//test if the edge has to kept or removed
		backoff = false; //no backoff yet since no edge has been removed
		for (w=0; w < rnnlm.getVocabSize(); w++) {
				p = all_prob[w];
				
				/*p_joint = exp(-posterior[id]-p);*/
				p_joint = exp(-p);
				delta = -1.0*p_joint*log2(p_joint);
				
				//accept edge if this leads to a minimum
				//relative gain of the entropy

				dprintf(2,"P = %e \tP_joint = %e \tH = %e \tDelta =%e \tDelta H = %.6f %%\n",exp(-p), p_joint, entropy, delta, 100.0*delta/entropy);

				if (set_min_backoff.find(fsth) != set_min_backoff.end() || (delta > pruning_threshold*entropy)) {
//				if ((fsth == min_backoff) || (delta > pruning_threshold*entropy)) {
					next_n_added++;
					to_be_added.push_back(w);
					to_be_added_prob.push_back(p);
					dprintf(2,"\tACCEPT [%li] -- %i (%s) / %f --> ...\t(%e > %e)\n", id, w, rnnlm.getWordString(w), p, delta, pruning_threshold*entropy);
//					to_be_removed.push_back(w);
 				}
 				//backoff
				else {
//					to_be_removed.push_back(w);
					backoff = true;
					dprintf(2,"\tPRUNE [%li] -- %i / %f --> ...\n", id, w, p);
 				}
 				
 				//print
				if (next_n_processed % 100000 == 0) {
						fprintf(stderr, "\rH=%.5f / N proc'd=%li / N added=%li (%.5f %%) / N bo=%li (%.5f %%) / %li/%li Nodes (%2.1f %%) / N min BO=%i", entropy, n_processed, n_added, ((float) n_added/ (float)n_processed)*100.0, n_backoff, ((float) n_backoff/ (float)n_added)*100.0, id, id+q.size(), 100.0 - (float) (100.0*id/(id+q.size())), (int) set_min_backoff.size());
				}
				next_n_processed++;
 				
//			}
		}


		//Set a part of the new FST history
		new_fsth.setFstHistory(rnnlm, *dzer);

		//if at least one word is backing off
		if (backoff) {
			
			n_backoff++;
			if (to_be_added.size() == 0) {
				n_only_backoff++;
			}
			
			
			if (addFstState(new_id, new NeuronFstHistory(bo_fsth), fst)) {
				q.push(bo_fsth);
				try { non_bo_pred.at(new_id) = false; }
				catch (exception e) {
					non_bo_pred.resize(new_id+(int) (non_bo_pred.size()*0.5)+1);
					non_bo_pred.at(new_id) = false;
				}
				
			}
			dprintf(1,"BACKOFF\t[%li]\t(%s)\n-------\t[%li]\t(%s)\n", id, fsth.toString().c_str(), new_id, bo_fsth.toString().c_str());

			fst.AddArc(id, LogArc(EPSILON, EPSILON, LogWeight::Zero(), new_id));
			
			addPred(pred, new_id, id);
			
		}
		
		
		vector<real>::iterator it_p = to_be_added_prob.begin();
		for (vector<int>::iterator it = to_be_added.begin(); it != to_be_added.end(); ++it) {
			w = *it;
			p = *it_p;

			if (w == 0) {
				fst.AddArc(id, LogArc(FstWord(w),FstWord(w),p,FINAL_STATE));
				dprintf(1,"EDGE [%li] (%s)\n---- %i (%s) / %f -->\n---- [%li] FINAL STATE)\n\n", id, fsth.toString().c_str(), FstWord(w), rnnlm.getWordString(w), p, FINAL_STATE);				
			}
		
			//accept edge
			else {
				new_fsth.setLastWord(w);
	
				//if sw not in the memory
				//then add a new state for sw in the FST and push sw in the queue
				if (addFstState(new_id, new NeuronFstHistory(new_fsth), fst)) {
					q.push(new_fsth);
					try { non_bo_pred.at(new_id) = true; }
					catch (exception e) {
						non_bo_pred.resize(new_id+(int) (non_bo_pred.size()*0.5)+1);
						non_bo_pred.at(new_id) = true;
					}
				}
				else { /* already exists */ }
			
				//add the edge in the FST
				non_bo_pred.at(new_id) = true;
				fst.AddArc(id, LogArc(FstWord(w),FstWord(w),p,new_id));
				dprintf(1,"EDGE [%li] (%s)\n---- %i (%s) / %f -->\n---- [%li] (%s)\n\n", id, fsth.toString().c_str(), FstWord(w), rnnlm.getWordString(w), p, new_id, new_fsth.toString().c_str());				

//				posterior.at(new_id) += posterior[id]*p;

			}
			
			/*if (posterior[id]+p < LogWeight::Zero().Value()) {
				p_joint = exp(-posterior[id]-p);
				entropy -= p_joint*log2(p_joint);
			}*/
			
			++it_p;
		}
		
		n_added = next_n_added;
		n_processed = next_n_processed;
		
		//reset queues
		to_be_added.clear();
		to_be_added_prob.clear();
//		to_be_removed.clear();
		
	}

	cout << endl;
	
	//compute backoff weights
	deleted = compactBackoffNodes(fst, pred, non_bo_pred);
	computeAllBackoff(fst, pred);


	//remove useless nodes
	removeStates(fst, new_fst, deleted);
	fst.DeleteStates();
	fst = new_fst;
	
	//Fill the table of symbols
	SymbolTable dic("dictionnary");
	dic.AddSymbol("*", 0);
	for (int i=0; i<rnnlm.getVocabSize(); i++) {
		dic.AddSymbol(string(rnnlm.getWordString(i)), i+1);
	}
	fst.SetInputSymbols(&dic);
	fst.SetOutputSymbols(&dic);

						//printf("H=%.5f / N proc'd=%li / N added=%li (%.5f %%) %li/%li Nodes (%2.1f %%)\n", entropy, n_processed, n_added, ((float) n_added/ (float)n_processed)*100.0, id, id+q.size(), 100.0 - (float) (100.0*id/(id+q.size())));
	cout << "END" << endl;
	
}

Example #4

Show file

File: g2p_train.cpp Project: ShahAlay/sphinxtrain

void
train_model(string eps, string s1s2_sep, string skip, int order,
            string smooth, string prefix, string seq_sep, string prune,
            double theta, string count_pattern)
{
    namespace s = fst::script;
    using fst::script::FstClass;
    using fst::script::MutableFstClass;
    using fst::script::VectorFstClass;
    using fst::script::WeightClass;

    // create symbols file
    cout << "Generating symbols..." << endl;
    NGramInput *ingram =
        new NGramInput(prefix + ".corpus.aligned", prefix + ".corpus.syms",
                       "", eps, unknown_symbol, "", "");
    ingram->ReadInput(0, 1);

    // compile strings into a far archive
    cout << "Compiling symbols into FAR archive..." << endl;
    fst::FarEntryType fet = fst::StringToFarEntryType(entry_type);
    fst::FarTokenType ftt = fst::StringToFarTokenType(token_type);
    fst::FarType fartype = fst::FarTypeFromString(far_type);

    delete ingram;

    vector<string> in_fname;
    in_fname.push_back(prefix + ".corpus.aligned");

    fst::script::FarCompileStrings(in_fname, prefix + ".corpus.far",
                                   arc_type, fst_type, fartype,
                                   generate_keys, fet, ftt,
                                   prefix + ".corpus.syms", unknown_symbol,
                                   keep_symbols, initial_symbols,
                                   allow_negative_labels, file_list_input,
                                   key_prefix, key_suffix);

    //count n-grams
    cout << "Counting n-grams..." << endl;
    NGramCounter<Log64Weight> ngram_counter(order, epsilon_as_backoff);

    FstReadOptions opts;
    FarReader<StdArc> *far_reader;
    far_reader = FarReader<StdArc>::Open(prefix + ".corpus.far");
    int fstnumber = 1;
    const Fst<StdArc> *ifst = 0, *lfst = 0;
    while (!far_reader->Done()) {
        if (ifst)
            delete ifst;
        ifst = far_reader->GetFst().Copy();

        if (!ifst) {
            E_FATAL("ngramcount: unable to read fst #%d\n", fstnumber);
            //exit(1);
        }

        bool counted = false;
        if (ifst->Properties(kString | kUnweighted, true)) {
            counted = ngram_counter.Count(*ifst);
        }
        else {
            VectorFst<Log64Arc> log_ifst;
            Map(*ifst, &log_ifst, ToLog64Mapper<StdArc> ());
            counted = ngram_counter.Count(&log_ifst);
        }
        if (!counted)
            cout << "ngramcount: fst #" << fstnumber << endl;

        if (ifst->InputSymbols() != 0) {        // retain for symbol table
            if (lfst)
                delete lfst;    // delete previously observed symbol table
            lfst = ifst;
            ifst = 0;
        }
        far_reader->Next();
        ++fstnumber;
    }
    delete far_reader;

    if (!lfst) {
        E_FATAL("None of the input FSTs had a symbol table\n");
        //exit(1);
    }

    VectorFst<StdArc> vfst;
    ngram_counter.GetFst(&vfst);
    ArcSort(&vfst, StdILabelCompare());
    vfst.SetInputSymbols(lfst->InputSymbols());
    vfst.SetOutputSymbols(lfst->InputSymbols());
    vfst.Write(prefix + ".corpus.cnts");
    StdMutableFst *fst =
        StdMutableFst::Read(prefix + ".corpus.cnts", true);
    if (smooth != "no") {
        cout << "Smoothing model..." << endl;

        bool prefix_norm = 0;
        if (smooth == "presmoothed") {  // only for use with randgen counts
            prefix_norm = 1;
            smooth = "unsmoothed";      // normalizes only based on prefix count
        }
        if (smooth == "kneser_ney") {
            NGramKneserNey ngram(fst, backoff, backoff_label,
                                 norm_eps, check_consistency,
                                 discount_D, bins);
            ngram.MakeNGramModel();
            fst = ngram.GetMutableFst();
        }
        else if (smooth == "absolute") {
            NGramAbsolute ngram(fst, backoff, backoff_label,
                                norm_eps, check_consistency,
                                discount_D, bins);
            ngram.MakeNGramModel();
            fst = ngram.GetMutableFst();
        }
        else if (smooth == "katz") {
            NGramKatz ngram(fst, backoff, backoff_label,
                            norm_eps, check_consistency, bins);
            ngram.MakeNGramModel();
            fst = ngram.GetMutableFst();
        }
        else if (smooth == "witten_bell") {
            NGramWittenBell ngram(fst, backoff, backoff_label,
                                  norm_eps, check_consistency,
                                  witten_bell_k);
            ngram.MakeNGramModel();
            fst = ngram.GetMutableFst();
        }
        else if (smooth == "unsmoothed") {
            NGramUnsmoothed ngram(fst, 1, prefix_norm, backoff_label,
                                  norm_eps, check_consistency);
            ngram.MakeNGramModel();
            fst = ngram.GetMutableFst();
        }
        else {
            E_FATAL("Bad smoothing method: %s\n", smooth.c_str());
        }
    }
    if (prune != "no") {
        cout << "Pruning model..." << endl;

        if (prune == "count_prune") {
            NGramCountPrune ngramsh(fst, count_pattern,
                                    shrink_opt, total_unigram_count,
                                    backoff_label, norm_eps,
                                    check_consistency);
            ngramsh.ShrinkNGramModel();
        }
        else if (prune == "relative_entropy") {
            NGramRelEntropy ngramsh(fst, theta, shrink_opt,
                                    total_unigram_count, backoff_label,
                                    norm_eps, check_consistency);
            ngramsh.ShrinkNGramModel();
        }
        else if (prune == "seymore") {
            NGramSeymoreShrink ngramsh(fst, theta, shrink_opt,
                                       total_unigram_count, backoff_label,
                                       norm_eps, check_consistency);
            ngramsh.ShrinkNGramModel();
        }
        else {
            E_FATAL("Bad shrink method:  %s\n", prune.c_str());
        }
    }

    cout << "Minimizing model..." << endl;
    MutableFstClass *minimized = new s::MutableFstClass(*fst);
    Minimize(minimized, 0, fst::kDelta);
    fst = minimized->GetMutableFst<StdArc>();

    cout << "Correcting final model..." << endl;
    StdMutableFst *out = new StdVectorFst();
    relabel(fst, out, prefix, eps, skip, s1s2_sep, seq_sep);

    cout << "Writing binary model to disk..." << endl;
    out->Write(prefix + ".fst");
}